def test_multi_class_best_ntree_limit(): base_score = 0.5 estimator = xgboost.XGBClassifier(n_estimators=100, random_state=1, max_depth=1, base_score=base_score) estimator.best_ntree_limit = 1 utils.train_model_classification(estimator) assembler = assemblers.XGBoostModelAssembler(estimator) actual = assembler.assemble() estimator_exp_class1 = ast.ExpExpr(ast.SubroutineExpr( ast.BinNumExpr( ast.NumVal(0.5), ast.IfExpr( ast.CompExpr(ast.FeatureRef(2), ast.NumVal(2.5999999), ast.CompOpType.GTE), ast.NumVal(-0.0731707439), ast.NumVal(0.142857149)), ast.BinNumOpType.ADD)), to_reuse=True) estimator_exp_class2 = ast.ExpExpr(ast.SubroutineExpr( ast.BinNumExpr( ast.NumVal(0.5), ast.IfExpr( ast.CompExpr(ast.FeatureRef(2), ast.NumVal(2.5999999), ast.CompOpType.GTE), ast.NumVal(0.0341463387), ast.NumVal(-0.0714285821)), ast.BinNumOpType.ADD)), to_reuse=True) estimator_exp_class3 = ast.ExpExpr(ast.SubroutineExpr( ast.BinNumExpr( ast.NumVal(0.5), ast.IfExpr( ast.CompExpr(ast.FeatureRef(2), ast.NumVal(4.85000038), ast.CompOpType.GTE), ast.NumVal(0.129441619), ast.NumVal(-0.0681440532)), ast.BinNumOpType.ADD)), to_reuse=True) exp_sum = ast.BinNumExpr(ast.BinNumExpr(estimator_exp_class1, estimator_exp_class2, ast.BinNumOpType.ADD), estimator_exp_class3, ast.BinNumOpType.ADD, to_reuse=True) expected = ast.VectorVal([ ast.BinNumExpr(estimator_exp_class1, exp_sum, ast.BinNumOpType.DIV), ast.BinNumExpr(estimator_exp_class2, exp_sum, ast.BinNumOpType.DIV), ast.BinNumExpr(estimator_exp_class3, exp_sum, ast.BinNumOpType.DIV) ]) assert utils.cmp_exprs(actual, expected)
def test_multi_class(): estimator = tree.DecisionTreeClassifier() estimator.fit([[1], [2], [3]], [0, 1, 2]) assembler = assemblers.TreeModelAssembler(estimator) actual = assembler.assemble() expected = ast.IfExpr( ast.CompExpr(ast.FeatureRef(0), ast.NumVal(1.5), ast.CompOpType.LTE), ast.VectorVal([ast.NumVal(1.0), ast.NumVal(0.0), ast.NumVal(0.0)]), ast.IfExpr( ast.CompExpr(ast.FeatureRef(0), ast.NumVal(2.5), ast.CompOpType.LTE), ast.VectorVal([ast.NumVal(0.0), ast.NumVal(1.0), ast.NumVal(0.0)]), ast.VectorVal([ast.NumVal(0.0), ast.NumVal(0.0), ast.NumVal(1.0)]))) assert utils.cmp_exprs(actual, expected)
def kernel_ast(sup_vec_value): return ast.SubroutineExpr( ast.BinNumExpr(ast.NumVal(sup_vec_value), ast.FeatureRef(0), ast.BinNumOpType.MUL))
def test_statsmodels_processmle(): estimator = utils.StatsmodelsSklearnLikeWrapper( ProcessMLE, dict(init=dict( exog_scale=np.ones( (len(utils.get_regression_model_trainer().y_train), 2)), exog_smooth=np.ones( (len(utils.get_regression_model_trainer().y_train), 2)), exog_noise=np.ones( (len(utils.get_regression_model_trainer().y_train), 2)), time=np.kron( np.ones( len(utils.get_regression_model_trainer().y_train) // 3), np.arange(3)), groups=np.kron( np.arange( len(utils.get_regression_model_trainer().y_train) // 3), np.ones(3))), fit=dict(maxiter=1))) _, __, estimator = utils.get_regression_model_trainer()(estimator) assembler = assemblers.ProcessMLEModelAssembler(estimator) actual = assembler.assemble() feature_weight_mul = [ ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(-0.0980302102110356), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(0.04863869398287732), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(2), ast.NumVal(0.009514054355147874), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(3), ast.NumVal(2.977113829322681), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(4), ast.NumVal(-2.6048073854474705), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(5), ast.NumVal(5.887987153279099), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(6), ast.NumVal(-0.008183580358672775), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(7), ast.NumVal(-0.996428929917054), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(8), ast.NumVal(0.1618353156581333), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(9), ast.NumVal(-0.009213049690188308), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(10), ast.NumVal(-0.3634816838591863), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(11), ast.NumVal(0.014700492832969888), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(12), ast.NumVal(-0.4384298738156768), ast.BinNumOpType.MUL), ] expected = assemblers.utils.apply_op_to_expressions( ast.BinNumOpType.ADD, ast.NumVal(0.0), *feature_weight_mul) assert utils.cmp_exprs(actual, expected)
def test_lightning_binary_class(): estimator = AdaGradClassifier(random_state=1) utils.get_binary_classification_model_trainer()(estimator) assembler = assemblers.SklearnLinearModelAssembler(estimator) actual = assembler.assemble() feature_weight_mul = [ ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(0.16218889967390476), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(0.10012761963766906), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(2), ast.NumVal(0.6289276652681673), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(3), ast.NumVal(0.17618420156072845), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(4), ast.NumVal(0.0010492096607182045), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(5), ast.NumVal(-0.0029135563693806913), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(6), ast.NumVal(-0.005923882409142498), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(7), ast.NumVal(-0.0023293599172479755), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(8), ast.NumVal(0.0020808828960210517), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(9), ast.NumVal(0.0009846430705550103), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(10), ast.NumVal(0.0010399810925427265), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(11), ast.NumVal(0.011203056917272093), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(12), ast.NumVal(-0.007271351370867731), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(13), ast.NumVal(-0.26333437096804224), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(14), ast.NumVal(1.8533543368532444e-05), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(15), ast.NumVal(-0.0008266341686278445), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(16), ast.NumVal(-0.0011090316301215724), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(17), ast.NumVal(-0.0001910857095336291), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(18), ast.NumVal(0.00010735116208006556), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(19), ast.NumVal(-4.076097659514017e-05), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(20), ast.NumVal(0.15300712110146406), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(21), ast.NumVal(0.06316277258339074), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(22), ast.NumVal(0.495291178977687), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(23), ast.NumVal(-0.29589136204657845), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(24), ast.NumVal(0.000771932729567487), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(25), ast.NumVal(-0.011877978242492428), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(26), ast.NumVal(-0.01678004536869617), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(27), ast.NumVal(-0.004070431062579625), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(28), ast.NumVal(0.001158641497209262), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(29), ast.NumVal(0.00010737287732588742), ast.BinNumOpType.MUL), ] expected = assemblers.utils.apply_op_to_expressions( ast.BinNumOpType.ADD, ast.NumVal(0.0), *feature_weight_mul) assert utils.cmp_exprs(actual, expected)
def test_lightning_multi_class_rbf_kernel(): estimator = KernelSVC(kernel="rbf", random_state=1, gamma=2.0) estimator.fit(np.array([[1], [2], [3]]), np.array([1, 2, 3])) assembler = assemblers.LightningSVMModelAssembler(estimator) actual = assembler.assemble() kernels = [ _rbf_kernel_ast(estimator, float(i)) for i in range(1, 4) ] expected = ast.VectorVal([ ast.BinNumExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(0.0), ast.BinNumExpr( kernels[0], ast.NumVal(0.5342246289), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr( kernels[1], ast.NumVal(-0.5046204480), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr( kernels[2], ast.NumVal(-0.4659431306), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(0.0), ast.BinNumExpr( kernels[0], ast.NumVal(-0.5386765707), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr( kernels[1], ast.NumVal(0.5729019463), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr( kernels[2], ast.NumVal(-0.5386765707), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(0.0), ast.BinNumExpr( kernels[0], ast.NumVal(-0.4659431306), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr( kernels[1], ast.NumVal(-0.5046204480), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr( kernels[2], ast.NumVal(0.5342246289), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD)]) assert utils.cmp_exprs(actual, expected)
def test_statsmodels_wo_const(): estimator = utils.StatsmodelsSklearnLikeWrapper(sm.GLS, {}) _, __, estimator = utils.get_regression_model_trainer()(estimator) assembler = assemblers.StatsmodelsModelAssemblerSelector(estimator) actual = assembler.assemble() feature_weight_mul = [ ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(-0.09519078450227643), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(0.048952926782237956), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(2), ast.NumVal(0.007485539189808044), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(3), ast.NumVal(2.7302631809978273), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(4), ast.NumVal(-2.5078200782168034), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(5), ast.NumVal(5.891794660307579), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(6), ast.NumVal(-0.008663096157185936), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(7), ast.NumVal(-0.9742684875268565), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(8), ast.NumVal(0.1591703441858682), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(9), ast.NumVal(-0.009351831548409096), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(10), ast.NumVal(-0.36395034626096245), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(11), ast.NumVal(0.014529018124980565), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(12), ast.NumVal(-0.437443877026267), ast.BinNumOpType.MUL), ] expected = assemblers.utils.apply_op_to_expressions( ast.BinNumOpType.ADD, ast.NumVal(0.0), *feature_weight_mul) assert utils.cmp_exprs(actual, expected)
def test_multi_class_best_ntree_limit(): base_score = 0.5 estimator = xgboost.XGBClassifier(n_estimators=100, random_state=1, max_depth=1, base_score=base_score) estimator.best_ntree_limit = 1 utils.train_model_classification(estimator) assembler = assemblers.XGBoostModelAssemblerSelector(estimator) actual = assembler.assemble() estimator_exp_class1 = ast.ExpExpr(ast.SubroutineExpr( ast.BinNumExpr( ast.NumVal(0.5), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(2), ast.NumVal(2.45000005), ast.CompOpType.GTE), ast.NumVal(-0.0733167157), ast.NumVal(0.143414631))), ast.BinNumOpType.ADD)), to_reuse=True) estimator_exp_class2 = ast.ExpExpr(ast.SubroutineExpr( ast.BinNumExpr( ast.NumVal(0.5), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(2), ast.NumVal(2.45000005), ast.CompOpType.GTE), ast.NumVal(0.0344139598), ast.NumVal(-0.0717073306))), ast.BinNumOpType.ADD)), to_reuse=True) estimator_exp_class3 = ast.ExpExpr(ast.SubroutineExpr( ast.BinNumExpr( ast.NumVal(0.5), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(3), ast.NumVal(1.6500001), ast.CompOpType.GTE), ast.NumVal(0.13432835), ast.NumVal(-0.0644444525))), ast.BinNumOpType.ADD)), to_reuse=True) exp_sum = ast.BinNumExpr(ast.BinNumExpr(estimator_exp_class1, estimator_exp_class2, ast.BinNumOpType.ADD), estimator_exp_class3, ast.BinNumOpType.ADD, to_reuse=True) expected = ast.VectorVal([ ast.BinNumExpr(estimator_exp_class1, exp_sum, ast.BinNumOpType.DIV), ast.BinNumExpr(estimator_exp_class2, exp_sum, ast.BinNumOpType.DIV), ast.BinNumExpr(estimator_exp_class3, exp_sum, ast.BinNumOpType.DIV) ]) assert utils.cmp_exprs(actual, expected)
def test_linear_model(): estimator = xgboost.XGBRegressor(n_estimators=2, random_state=1, feature_selector="shuffle", booster="gblinear") utils.train_model_regression(estimator) assembler = assemblers.XGBoostModelAssemblerSelector(estimator) actual = assembler.assemble() feature_weight_mul = [ ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(-0.00999326), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(0.0520094), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(2), ast.NumVal(0.10447), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(3), ast.NumVal(0.17387), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(4), ast.NumVal(0.691745), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(5), ast.NumVal(0.296357), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(6), ast.NumVal(0.0288206), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(7), ast.NumVal(0.417822), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(8), ast.NumVal(0.0551116), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(9), ast.NumVal(0.00242449), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(10), ast.NumVal(0.109585), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(11), ast.NumVal(0.00744202), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(12), ast.NumVal(0.0731089), ast.BinNumOpType.MUL), ] expected = ast.SubroutineExpr( ast.BinNumExpr( ast.NumVal(0.5), assemblers.utils.apply_op_to_expressions(ast.BinNumOpType.ADD, ast.NumVal(3.13109), *feature_weight_mul), ast.BinNumOpType.ADD)) assert utils.cmp_exprs(actual, expected)
def test_count_exprs_exclude_list(): assert ast.count_exprs(ast.BinNumExpr(ast.NumVal(1), ast.NumVal(2), ast.BinNumOpType.ADD), exclude_list={ast.BinExpr, ast.NumVal}) == 0 assert ast.count_exprs(ast.BinNumExpr(ast.NumVal(1), ast.NumVal(2), ast.BinNumOpType.ADD), exclude_list={ast.BinNumExpr}) == 2 EXPR_WITH_ALL_EXPRS = ast.BinVectorNumExpr( ast.BinVectorExpr( ast.VectorVal([ ast.AbsExpr(ast.NumVal(-2)), ast.ExpExpr(ast.NumVal(2)), ast.LogExpr(ast.NumVal(2)), ast.Log1pExpr(ast.NumVal(2)), ast.SqrtExpr(ast.NumVal(2)), ast.PowExpr(ast.NumVal(2), ast.NumVal(3)), ast.TanhExpr(ast.NumVal(1)), ast.BinNumExpr(ast.NumVal(0), ast.FeatureRef(0), ast.BinNumOpType.ADD) ]), ast.IdExpr( ast.VectorVal([ ast.NumVal(1), ast.NumVal(2), ast.NumVal(3), ast.NumVal(4),
def test_num_val(): assert type(ast.NumVal(1).value) == np.float64 assert type(ast.NumVal(1, dtype=np.float32).value) == np.float32 assert type(ast.NumVal(1, dtype=np.float64).value) == np.float64 assert type(ast.NumVal(1, dtype=np.int8).value) == np.int8 assert type(ast.NumVal(1, dtype=int).value) == int
def test_deep_mixed_exprs_exceeding_threshold(): expr = ast.NumVal(1) for i in range(4): inner = ast.NumVal(1) for j in range(4): inner = ast.BinNumExpr(ast.NumVal(i), inner, ast.BinNumOpType.ADD) expr = ast.IfExpr( ast.CompExpr(inner, ast.NumVal(j), ast.CompOpType.EQ), ast.NumVal(1), expr) expected_code = """ score <- function(input) { var1 <- subroutine0(input) if (((3.0) + (var1)) == (3.0)) { var0 <- 1.0 } else { var2 <- subroutine1(input) if (((2.0) + (var2)) == (3.0)) { var0 <- 1.0 } else { var3 <- subroutine2(input) if (((1.0) + (var3)) == (3.0)) { var0 <- 1.0 } else { var4 <- subroutine3(input) if (((0.0) + (var4)) == (3.0)) { var0 <- 1.0 } else { var0 <- 1.0 } } } } return(var0) } subroutine0 <- function(input) { var0 <- (3.0) + (1.0) var1 <- (3.0) + (var0) return((3.0) + (var1)) } subroutine1 <- function(input) { var0 <- (2.0) + (1.0) var1 <- (2.0) + (var0) return((2.0) + (var1)) } subroutine2 <- function(input) { var0 <- (1.0) + (1.0) var1 <- (1.0) + (var0) return((1.0) + (var1)) } subroutine3 <- function(input) { var0 <- (0.0) + (1.0) var1 <- (0.0) + (var0) return((0.0) + (var1)) } """ interpreter = RInterpreter() interpreter.bin_depth_threshold = 1 interpreter.ast_size_check_frequency = 2 interpreter.ast_size_per_subroutine_threshold = 6 assert_code_equal(interpreter.interpret(expr), expected_code)
def test_linear_model(): # Default updater ("shotgun") is nondeterministic estimator = xgboost.XGBRegressor(n_estimators=2, random_state=1, updater="coord_descent", feature_selector="shuffle", booster="gblinear") utils.get_regression_model_trainer()(estimator) assembler = assemblers.XGBoostModelAssemblerSelector(estimator) actual = assembler.assemble() feature_weight_mul = [ ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(-0.154567), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(0.0815865), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(2), ast.NumVal(-0.0979713), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(3), ast.NumVal(4.80472), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(4), ast.NumVal(1.35478), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(5), ast.NumVal(0.327222), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(6), ast.NumVal(0.0610654), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(7), ast.NumVal(0.46989), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(8), ast.NumVal(-0.0674318), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(9), ast.NumVal(-0.000506212), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(10), ast.NumVal(0.0732867), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(11), ast.NumVal(0.0108842), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(12), ast.NumVal(-0.140096), ast.BinNumOpType.MUL), ] expected = ast.BinNumExpr( ast.NumVal(0.5), assemblers.utils.apply_op_to_expressions(ast.BinNumOpType.ADD, ast.NumVal(11.138), *feature_weight_mul), ast.BinNumOpType.ADD) assert utils.cmp_exprs(actual, expected)
def test_multi_class_best_ntree_limit(): base_score = 0.5 estimator = xgboost.XGBClassifier(n_estimators=100, random_state=1, max_depth=1, base_score=base_score) utils.get_classification_model_trainer()(estimator) estimator.get_booster().best_ntree_limit = 1 assembler = assemblers.XGBoostModelAssemblerSelector(estimator) actual = assembler.assemble() estimator_class1 = ast.BinNumExpr( ast.NumVal(0.5), ast.IfExpr( ast.CompExpr(ast.FeatureRef(2), ast.NumVal(2.450000047683716), ast.CompOpType.GTE), ast.NumVal(-0.21995015442371368), ast.NumVal(0.43024390935897827)), ast.BinNumOpType.ADD) estimator_class2 = ast.BinNumExpr( ast.NumVal(0.5), ast.IfExpr( ast.CompExpr(ast.FeatureRef(2), ast.NumVal(2.450000047683716), ast.CompOpType.GTE), ast.NumVal(0.10324188321828842), ast.NumVal(-0.21512198448181152)), ast.BinNumOpType.ADD) estimator_class3 = ast.BinNumExpr( ast.NumVal(0.5), ast.IfExpr( ast.CompExpr(ast.FeatureRef(3), ast.NumVal(1.6500000953674316), ast.CompOpType.GTE), ast.NumVal(0.4029850661754608), ast.NumVal(-0.19333337247371674)), ast.BinNumOpType.ADD) expected = ast.SoftmaxExpr( [estimator_class1, estimator_class2, estimator_class3]) assert utils.cmp_exprs(actual, expected)
def test_log1p_expr(): expr = ast.Log1pExpr(ast.NumVal(2.0)) expected_code = """ using static System.Math; namespace ML { public static class Model { public static double Score(double[] input) { return Log1p(2.0); } private static double Log1p(double x) { if (x == 0.0) return 0.0; if (x == -1.0) return double.NegativeInfinity; if (x < -1.0) return double.NaN; double xAbs = Abs(x); if (xAbs < 0.5 * double.Epsilon) return x; if ((x > 0.0 && x < 1e-8) || (x > -1e-9 && x < 0.0)) return x * (1.0 - x * 0.5); if (xAbs < 0.375) { double[] coeffs = { 0.10378693562743769800686267719098e+1, -0.13364301504908918098766041553133e+0, 0.19408249135520563357926199374750e-1, -0.30107551127535777690376537776592e-2, 0.48694614797154850090456366509137e-3, -0.81054881893175356066809943008622e-4, 0.13778847799559524782938251496059e-4, -0.23802210894358970251369992914935e-5, 0.41640416213865183476391859901989e-6, -0.73595828378075994984266837031998e-7, 0.13117611876241674949152294345011e-7, -0.23546709317742425136696092330175e-8, 0.42522773276034997775638052962567e-9, -0.77190894134840796826108107493300e-10, 0.14075746481359069909215356472191e-10, -0.25769072058024680627537078627584e-11, 0.47342406666294421849154395005938e-12, -0.87249012674742641745301263292675e-13, 0.16124614902740551465739833119115e-13, -0.29875652015665773006710792416815e-14, 0.55480701209082887983041321697279e-15, -0.10324619158271569595141333961932e-15}; return x * (1.0 - x * ChebyshevBroucke(x / 0.375, coeffs)); } return Log(1.0 + x); } private static double ChebyshevBroucke(double x, double[] coeffs) { double b0, b1, b2, x2; b2 = b1 = b0 = 0.0; x2 = x * 2; for (int i = coeffs.Length - 1; i >= 0; --i) { b2 = b1; b1 = b0; b0 = x2 * b1 - b2 + coeffs[i]; } return (b0 - b2) * 0.5; } } } """ interpreter = CSharpInterpreter() utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_binary_classification(): estimator = xgboost.XGBClassifier(n_estimators=2, random_state=1, max_depth=1) utils.train_model_classification_binary(estimator) assembler = assemblers.XGBoostModelAssemblerSelector(estimator) actual = assembler.assemble() sigmoid = ast.BinNumExpr( ast.NumVal(1), ast.BinNumExpr( ast.NumVal(1), ast.ExpExpr( ast.BinNumExpr( ast.NumVal(0), ast.SubroutineExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(-0.0), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(20), ast.NumVal(16.7950001), ast.CompOpType.GTE), ast.NumVal(-0.173057005), ast.NumVal(0.163440868))), ast.BinNumOpType.ADD), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(27), ast.NumVal(0.142349988), ast.CompOpType.GTE), ast.NumVal(-0.161026895), ast.NumVal(0.149405137))), ast.BinNumOpType.ADD)), ast.BinNumOpType.SUB)), ast.BinNumOpType.ADD), ast.BinNumOpType.DIV, to_reuse=True) expected = ast.VectorVal([ ast.BinNumExpr(ast.NumVal(1), sigmoid, ast.BinNumOpType.SUB), sigmoid ]) assert utils.cmp_exprs(actual, expected)
def test_log1p_expr(): expr = ast.Log1pExpr(ast.NumVal(2.0)) expected_code = """ Module Model Function Score(ByRef inputVector() As Double) As Double Score = Log1p(2.0) End Function Function ChebyshevBroucke(ByVal x As Double, _ ByRef coeffs() As Double) As Double Dim b2 as Double Dim b1 as Double Dim b0 as Double Dim x2 as Double b2 = 0.0 b1 = 0.0 b0 = 0.0 x2 = x * 2 Dim i as Integer For i = UBound(coeffs) - 1 To 0 Step -1 b2 = b1 b1 = b0 b0 = x2 * b1 - b2 + coeffs(i) Next i ChebyshevBroucke = (b0 - b2) * 0.5 End Function Function Log1p(ByVal x As Double) As Double If x = 0.0 Then Log1p = 0.0 Exit Function End If If x = -1.0 Then On Error Resume Next Log1p = -1.0 / 0.0 Exit Function End If If x < -1.0 Then On Error Resume Next Log1p = 0.0 / 0.0 Exit Function End If Dim xAbs As Double xAbs = Math.Abs(x) If xAbs < 0.5 * 4.94065645841247e-324 Then Log1p = x Exit Function End If If (x > 0.0 AND x < 1e-8) OR (x > -1e-9 AND x < 0.0) Then Log1p = x * (1.0 - x * 0.5) Exit Function End If If xAbs < 0.375 Then Dim coeffs(22) As Double coeffs(0) = 0.10378693562743769800686267719098e+1 coeffs(1) = -0.13364301504908918098766041553133e+0 coeffs(2) = 0.19408249135520563357926199374750e-1 coeffs(3) = -0.30107551127535777690376537776592e-2 coeffs(4) = 0.48694614797154850090456366509137e-3 coeffs(5) = -0.81054881893175356066809943008622e-4 coeffs(6) = 0.13778847799559524782938251496059e-4 coeffs(7) = -0.23802210894358970251369992914935e-5 coeffs(8) = 0.41640416213865183476391859901989e-6 coeffs(9) = -0.73595828378075994984266837031998e-7 coeffs(10) = 0.13117611876241674949152294345011e-7 coeffs(11) = -0.23546709317742425136696092330175e-8 coeffs(12) = 0.42522773276034997775638052962567e-9 coeffs(13) = -0.77190894134840796826108107493300e-10 coeffs(14) = 0.14075746481359069909215356472191e-10 coeffs(15) = -0.25769072058024680627537078627584e-11 coeffs(16) = 0.47342406666294421849154395005938e-12 coeffs(17) = -0.87249012674742641745301263292675e-13 coeffs(18) = 0.16124614902740551465739833119115e-13 coeffs(19) = -0.29875652015665773006710792416815e-14 coeffs(20) = 0.55480701209082887983041321697279e-15 coeffs(21) = -0.10324619158271569595141333961932e-15 Log1p = x * (1.0 - x * ChebyshevBroucke(x / 0.375, coeffs)) Exit Function End If Log1p = Math.log(1.0 + x) End Function End Module """ interpreter = VisualBasicInterpreter() utils.assert_code_equal(interpreter.interpret(expr), expected_code)
def test_statsmodels_wo_const(): estimator = utils.StatsmodelsSklearnLikeWrapper(sm.GLS, {}) _, __, estimator = utils.get_regression_model_trainer()(estimator) assembler = assemblers.StatsmodelsModelAssemblerSelector(estimator) actual = assembler.assemble() feature_weight_mul = [ ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(-0.0926871267), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(0.0482139967), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(2), ast.NumVal(-0.0075524567), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(3), ast.NumVal(2.9965313383), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(4), ast.NumVal(-3.0877925575), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(5), ast.NumVal(5.9546630146), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(6), ast.NumVal(-0.0073548271), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(7), ast.NumVal(-0.9828206079), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(8), ast.NumVal(0.1727389546), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(9), ast.NumVal(-0.0094218658), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(10), ast.NumVal(-0.3931071261), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(11), ast.NumVal(0.0149656744), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(12), ast.NumVal(-0.4133835832), ast.BinNumOpType.MUL), ] expected = assemblers.utils.apply_op_to_expressions( ast.BinNumOpType.ADD, ast.NumVal(0.0), *feature_weight_mul) assert utils.cmp_exprs(actual, expected)
def test_binary_classification(): estimator = lightgbm.LGBMClassifier(n_estimators=2, random_state=1, max_depth=1) utils.train_model_classification_binary(estimator) assembler = assemblers.LightGBMModelAssembler(estimator) actual = assembler.assemble() sigmoid = ast.BinNumExpr( ast.NumVal(1), ast.BinNumExpr( ast.NumVal(1), ast.ExpExpr( ast.BinNumExpr( ast.NumVal(0), ast.SubroutineExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(0), ast.IfExpr( ast.CompExpr( ast.FeatureRef(23), ast.NumVal(868.2000000000002), ast.CompOpType.GT), ast.NumVal(0.2762557140263451), ast.NumVal(0.6399134166614473)), ast.BinNumOpType.ADD), ast.IfExpr( ast.CompExpr( ast.FeatureRef(27), ast.NumVal(0.14205000000000004), ast.CompOpType.GT), ast.NumVal(-0.2139321843285849), ast.NumVal(0.1151466338793227)), ast.BinNumOpType.ADD)), ast.BinNumOpType.SUB)), ast.BinNumOpType.ADD), ast.BinNumOpType.DIV, to_reuse=True) expected = ast.VectorVal([ ast.BinNumExpr(ast.NumVal(1), sigmoid, ast.BinNumOpType.SUB), sigmoid]) assert utils.cmp_exprs(actual, expected)
def test_statsmodels_w_const(): estimator = utils.StatsmodelsSklearnLikeWrapper( sm.GLS, dict(init=dict(fit_intercept=True))) _, __, estimator = utils.get_regression_model_trainer()(estimator) assembler = assemblers.StatsmodelsModelAssemblerSelector(estimator) actual = assembler.assemble() feature_weight_mul = [ ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(-0.1085910250), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(0.0441988987), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(2), ast.NumVal(0.0174669054), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(3), ast.NumVal(2.8323210870), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(4), ast.NumVal(-18.4837486980), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(5), ast.NumVal(3.8354955484), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(6), ast.NumVal(0.0001409165), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(7), ast.NumVal(-1.5040340047), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(8), ast.NumVal(0.3106174852), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(9), ast.NumVal(-0.0123066500), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(10), ast.NumVal(-0.9736183985), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(11), ast.NumVal(0.0094039648), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(12), ast.NumVal(-0.5203427347), ast.BinNumOpType.MUL), ] expected = assemblers.utils.apply_op_to_expressions( ast.BinNumOpType.ADD, ast.NumVal(37.1353468527), *feature_weight_mul) assert utils.cmp_exprs(actual, expected)
def test_statsmodels_w_const(): estimator = utils.StatsmodelsSklearnLikeWrapper( sm.GLS, dict(init=dict(fit_intercept=True))) _, __, estimator = utils.get_regression_model_trainer()(estimator) assembler = assemblers.StatsmodelsModelAssemblerSelector(estimator) actual = assembler.assemble() feature_weight_mul = [ ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(-0.1086131135490779), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(0.046461486329934965), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(2), ast.NumVal(0.027432259970185422), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(3), ast.NumVal(2.6160671309537693), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(4), ast.NumVal(-17.51793656329748), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(5), ast.NumVal(3.7674418196771957), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(6), ast.NumVal(-2.1581753172923886e-05), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(7), ast.NumVal(-1.4711768622633619), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(8), ast.NumVal(0.29567671400629103), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(9), ast.NumVal(-0.012233831527258853), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(10), ast.NumVal(-0.9220356453705244), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(11), ast.NumVal(0.009038220462695548), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(12), ast.NumVal(-0.5425830337142312), ast.BinNumOpType.MUL), ] expected = assemblers.utils.apply_op_to_expressions( ast.BinNumOpType.ADD, ast.NumVal(36.36708074657767), *feature_weight_mul) assert utils.cmp_exprs(actual, expected)
def test_statsmodels_processmle(): estimator = utils.StatsmodelsSklearnLikeWrapper( ProcessMLE, dict(init=dict( exog_scale=np.ones( (len(utils.get_regression_model_trainer().y_train), 2)), exog_smooth=np.ones( (len(utils.get_regression_model_trainer().y_train), 2)), exog_noise=np.ones( (len(utils.get_regression_model_trainer().y_train), 2)), time=np.kron( np.ones( len(utils.get_regression_model_trainer().y_train) // 3), np.arange(3)), groups=np.kron( np.arange( len(utils.get_regression_model_trainer().y_train) // 3), np.ones(3))), fit=dict(maxiter=1))) _, __, estimator = utils.get_regression_model_trainer()(estimator) assembler = assemblers.ProcessMLEModelAssembler(estimator) actual = assembler.assemble() feature_weight_mul = [ ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(-0.0932673973), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(0.0480819091), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(2), ast.NumVal(-0.0063734439), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(3), ast.NumVal(2.7510656855), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(4), ast.NumVal(-3.0836268637), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(5), ast.NumVal(5.9605290000), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(6), ast.NumVal(-0.0077880716), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(7), ast.NumVal(-0.9685365627), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(8), ast.NumVal(0.1688777882), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(9), ast.NumVal(-0.0092446419), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(10), ast.NumVal(-0.3924930042), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(11), ast.NumVal(0.01506511708295605), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(12), ast.NumVal(-0.4177000096), ast.BinNumOpType.MUL), ] expected = assemblers.utils.apply_op_to_expressions( ast.BinNumOpType.ADD, ast.NumVal(0.0), *feature_weight_mul) assert utils.cmp_exprs(actual, expected)
def test_lightning_regression(): estimator = AdaGradRegressor(random_state=1) utils.get_regression_model_trainer()(estimator) assembler = assemblers.SklearnLinearModelAssembler(estimator) actual = assembler.assemble() feature_weight_mul = [ ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(-0.08558826944690746), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(0.0803724696787377), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(2), ast.NumVal(-0.03516743076774846), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(3), ast.NumVal(0.26469178947134087), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(4), ast.NumVal(0.15651985221012488), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(5), ast.NumVal(1.5186399078028587), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(6), ast.NumVal(0.10089874009193693), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(7), ast.NumVal(-0.011426237067026246), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(8), ast.NumVal(0.0861987777487293), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(9), ast.NumVal(-0.0057791506839322574), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(10), ast.NumVal(0.3357752757550913), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(11), ast.NumVal(0.020189965076849486), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(12), ast.NumVal(-0.7390647599317609), ast.BinNumOpType.MUL), ] expected = assemblers.utils.apply_op_to_expressions( ast.BinNumOpType.ADD, ast.NumVal(0.0), *feature_weight_mul) assert utils.cmp_exprs(actual, expected)
def test_lightning_regression(): estimator = AdaGradRegressor(random_state=1) utils.get_regression_model_trainer()(estimator) assembler = assemblers.SklearnLinearModelAssembler(estimator) actual = assembler.assemble() feature_weight_mul = [ ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(-0.0961163452), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(0.1574398180), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(2), ast.NumVal(-0.0251799219), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(3), ast.NumVal(0.1975142192), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(4), ast.NumVal(0.1189621635), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(5), ast.NumVal(1.2977018274), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(6), ast.NumVal(0.1192977978), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(7), ast.NumVal(0.0331955333), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(8), ast.NumVal(0.1433964513), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(9), ast.NumVal(0.0014943531), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(10), ast.NumVal(0.3116036672), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(11), ast.NumVal(0.0258421936), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(12), ast.NumVal(-0.7386996349), ast.BinNumOpType.MUL), ] expected = assemblers.utils.apply_op_to_expressions( ast.BinNumOpType.ADD, ast.NumVal(0.0), *feature_weight_mul) assert utils.cmp_exprs(actual, expected)
def test_lightning_multi_class(): estimator = AdaGradClassifier(random_state=1) utils.get_classification_model_trainer()(estimator) assembler = assemblers.SklearnLinearModelAssembler(estimator) actual = assembler.assemble() expected = ast.VectorVal([ ast.BinNumExpr( ast.BinNumExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(0.0), ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(0.09686334892116512), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(0.32572202133211947), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr(ast.FeatureRef(2), ast.NumVal(-0.48444233646554424), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr(ast.FeatureRef(3), ast.NumVal(-0.219719145605816), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr( ast.BinNumExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(0.0), ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(-0.1089136473832088), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(-0.16956003333433572), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr(ast.FeatureRef(2), ast.NumVal(0.0365531256007199), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr(ast.FeatureRef(3), ast.NumVal(-0.01016100116780896), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr( ast.BinNumExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(0.0), ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(-0.16690339219780817), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(-0.19466284646233858), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr(ast.FeatureRef(2), ast.NumVal(0.2953585236360389), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr(ast.FeatureRef(3), ast.NumVal(0.21288203082531384), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD) ]) assert utils.cmp_exprs(actual, expected)
def test_lightning_binary_class(): estimator = AdaGradClassifier(random_state=1) utils.get_binary_classification_model_trainer()(estimator) assembler = assemblers.SklearnLinearModelAssembler(estimator) actual = assembler.assemble() feature_weight_mul = [ ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(0.1617602138), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(0.0931034793), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(2), ast.NumVal(0.6279180888), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(3), ast.NumVal(0.1856722189), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(4), ast.NumVal(0.0009999878), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(5), ast.NumVal(-0.0028974470), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(6), ast.NumVal(-0.0059948515), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(7), ast.NumVal(-0.0024173728), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(8), ast.NumVal(0.0020429247), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(9), ast.NumVal(0.0009604400), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(10), ast.NumVal(0.0010933747), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(11), ast.NumVal(0.0078588761), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(12), ast.NumVal(-0.0069150246), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(13), ast.NumVal(-0.2583249885), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(14), ast.NumVal(0.0000097479), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(15), ast.NumVal(-0.0007210600), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(16), ast.NumVal(-0.0011295195), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(17), ast.NumVal(-0.0001966115), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(18), ast.NumVal(0.0001358314), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(19), ast.NumVal(-0.0000378118), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(20), ast.NumVal(0.1555921773), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(21), ast.NumVal(0.0621307817), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(22), ast.NumVal(0.5138354949), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(23), ast.NumVal(-0.2418579612), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(24), ast.NumVal(0.0007953821), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(25), ast.NumVal(-0.0110760214), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(26), ast.NumVal(-0.0162178044), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(27), ast.NumVal(-0.0040277699), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(28), ast.NumVal(0.0015067033), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(29), ast.NumVal(0.0001536614), ast.BinNumOpType.MUL), ] expected = assemblers.utils.apply_op_to_expressions( ast.BinNumOpType.ADD, ast.NumVal(0.0), *feature_weight_mul) assert utils.cmp_exprs(actual, expected)
def test_count_all_exprs_types(): expr = ast.BinVectorNumExpr( ast.BinVectorExpr( ast.VectorVal([ ast.ExpExpr(ast.NumVal(2)), ast.SqrtExpr(ast.NumVal(2)), ast.PowExpr(ast.NumVal(2), ast.NumVal(3)), ast.TanhExpr(ast.NumVal(1)), ast.BinNumExpr(ast.NumVal(0), ast.FeatureRef(0), ast.BinNumOpType.ADD) ]), ast.VectorVal([ ast.NumVal(1), ast.NumVal(2), ast.NumVal(3), ast.NumVal(4), ast.FeatureRef(1) ]), ast.BinNumOpType.SUB), ast.IfExpr( ast.CompExpr(ast.NumVal(2), ast.NumVal(0), ast.CompOpType.GT), ast.NumVal(3), ast.NumVal(4), ), ast.BinNumOpType.MUL) assert ast.count_exprs(expr) == 27
def test_lightning_multi_class(): estimator = AdaGradClassifier(random_state=1) utils.get_classification_model_trainer()(estimator) assembler = assemblers.SklearnLinearModelAssembler(estimator) actual = assembler.assemble() expected = ast.VectorVal([ ast.BinNumExpr( ast.BinNumExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(0.0), ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(0.0935146297), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(0.3213921354), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr(ast.FeatureRef(2), ast.NumVal(-0.4855914264), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr(ast.FeatureRef(3), ast.NumVal(-0.2214295302), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr( ast.BinNumExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(0.0), ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(-0.1103262586), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(-0.1662457692), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr(ast.FeatureRef(2), ast.NumVal(0.0379823341), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr(ast.FeatureRef(3), ast.NumVal(-0.0128634938), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr( ast.BinNumExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(0.0), ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(-0.1685751402), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(-0.2045901693), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr(ast.FeatureRef(2), ast.NumVal(0.2932121798), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD), ast.BinNumExpr(ast.FeatureRef(3), ast.NumVal(0.2138148665), ast.BinNumOpType.MUL), ast.BinNumOpType.ADD) ]) assert utils.cmp_exprs(actual, expected)
def abs(expr): expr = ast.IdExpr(expr, to_reuse=True) return ast.IfExpr(utils.lt(expr, ast.NumVal(0.0)), utils.sub(ast.NumVal(0.0), expr), expr)
def _assemble_cond(self, node_id): feature_idx = self._tree.feature[node_id] threshold_num_val = ast.NumVal(self._tree.threshold[node_id]) return lte(ast.FeatureRef(feature_idx), threshold_num_val)