def test_regression(): base_score = 0.6 estimator = xgboost.XGBRegressor(n_estimators=2, random_state=1, max_depth=1, base_score=base_score) utils.train_model_regression(estimator) assembler = assemblers.XGBoostModelAssembler(estimator) actual = assembler.assemble() expected = ast.SubroutineExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(base_score), ast.IfExpr( ast.CompExpr( ast.FeatureRef(12), ast.NumVal(9.72500038), ast.CompOpType.GTE), ast.NumVal(1.67318344), ast.NumVal(2.92757893)), ast.BinNumOpType.ADD), ast.IfExpr( ast.CompExpr( ast.FeatureRef(5), ast.NumVal(6.94099998), ast.CompOpType.GTE), ast.NumVal(3.3400948), ast.NumVal(1.72118247)), ast.BinNumOpType.ADD)) assert utils.cmp_exprs(actual, expected)
def test_regression_best_ntree_limit(): base_score = 0.6 estimator = xgboost.XGBRegressor(n_estimators=3, random_state=1, max_depth=1, base_score=base_score) estimator.best_ntree_limit = 2 utils.train_model_regression(estimator) assembler = assemblers.XGBoostModelAssembler(estimator) actual = assembler.assemble() expected = ast.SubroutineExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(base_score), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(12), ast.NumVal(9.72500038), ast.CompOpType.GTE), ast.NumVal(1.6614188), ast.NumVal(2.91697121))), ast.BinNumOpType.ADD), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(5), ast.NumVal(6.94099998), ast.CompOpType.GTE), ast.NumVal(3.33810854), ast.NumVal(1.71813202))), ast.BinNumOpType.ADD)) assert utils.cmp_exprs(actual, expected)
def test_regression_saved_without_feature_names(): base_score = 0.6 estimator = xgboost.XGBRegressor(n_estimators=2, random_state=1, max_depth=1, base_score=base_score) utils.train_model_regression(estimator) with utils.tmp_dir() as tmp_dirpath: filename = os.path.join(tmp_dirpath, "tmp.file") estimator.save_model(filename) estimator = xgboost.XGBRegressor(base_score=base_score) estimator.load_model(filename) assembler = assemblers.XGBoostModelAssembler(estimator) actual = assembler.assemble() expected = ast.SubroutineExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(base_score), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(12), ast.NumVal(9.72500038), ast.CompOpType.GTE), ast.NumVal(1.6614188), ast.NumVal(2.91697121))), ast.BinNumOpType.ADD), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(5), ast.NumVal(6.94099998), ast.CompOpType.GTE), ast.NumVal(3.33810854), ast.NumVal(1.71813202))), ast.BinNumOpType.ADD)) assert utils.cmp_exprs(actual, expected)
def test_regression(): estimator = lightgbm.LGBMRegressor(n_estimators=2, random_state=1, max_depth=1) utils.train_model_regression(estimator) assembler = assemblers.LightGBMModelAssembler(estimator) actual = assembler.assemble() expected = ast.SubroutineExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(0), ast.IfExpr( ast.CompExpr( ast.FeatureRef(5), ast.NumVal(6.8455), ast.CompOpType.GT), ast.NumVal(24.007392728914056), ast.NumVal(22.35695742616179)), ast.BinNumOpType.ADD), ast.IfExpr( ast.CompExpr( ast.FeatureRef(12), ast.NumVal(9.63), ast.CompOpType.GT), ast.NumVal(-0.4903836928981587), ast.NumVal(0.7222498915097475)), ast.BinNumOpType.ADD)) assert utils.cmp_exprs(actual, expected)
def test_regression_random_forest(): base_score = 0.6 estimator = xgboost.XGBRFRegressor(n_estimators=2, random_state=1, max_depth=1, base_score=base_score) utils.train_model_regression(estimator) assembler = assemblers.XGBoostModelAssemblerSelector(estimator) actual = assembler.assemble() expected = ast.SubroutineExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(0.6), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(5), ast.NumVal(6.8375001), ast.CompOpType.GTE), ast.NumVal(17.3671646), ast.NumVal(9.48354053))), ast.BinNumOpType.ADD), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(12), ast.NumVal(9.72500038), ast.CompOpType.GTE), ast.NumVal(8.31587982), ast.NumVal(14.7766275))), ast.BinNumOpType.ADD)) assert utils.cmp_exprs(actual, expected)
def test_regression(): estimator = lightgbm.LGBMRegressor(n_estimators=2, random_state=1, max_depth=1) utils.train_model_regression(estimator) assembler = assemblers.LightGBMModelAssembler(estimator) actual = assembler.assemble() expected = ast.SubroutineExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(0), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(5), ast.NumVal(6.918), ast.CompOpType.GT), ast.NumVal(24.011454621684155), ast.NumVal(22.289277544391084))), ast.BinNumOpType.ADD), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(12), ast.NumVal(9.63), ast.CompOpType.GT), ast.NumVal(-0.49461212269771115), ast.NumVal(0.7174324413014594))), ast.BinNumOpType.ADD)) assert utils.cmp_exprs(actual, expected)
def test_regression_random_forest(): estimator = lightgbm.LGBMRegressor(boosting_type="rf", n_estimators=2, random_state=1, max_depth=1, subsample=0.7, subsample_freq=1) utils.train_model_regression(estimator) assembler = assemblers.LightGBMModelAssembler(estimator) actual = assembler.assemble() expected = ast.SubroutineExpr( ast.BinNumExpr( ast.BinNumExpr( ast.BinNumExpr( ast.NumVal(0), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(5), ast.NumVal(6.954000000000001), ast.CompOpType.GT), ast.NumVal(37.24347877367631), ast.NumVal(19.936999995530854))), ast.BinNumOpType.ADD), ast.SubroutineExpr( ast.IfExpr( ast.CompExpr(ast.FeatureRef(5), ast.NumVal(6.971500000000001), ast.CompOpType.GT), ast.NumVal(38.48600037864964), ast.NumVal(20.183783757300255))), ast.BinNumOpType.ADD), ast.NumVal(0.5), ast.BinNumOpType.MUL)) assert utils.cmp_exprs(actual, expected)
def _get_pickled_trained_model(): estimator = linear_model.LinearRegression() utils.train_model_regression(estimator) infile = io.BytesIO() pickle.dump(estimator, infile) infile.seek(0) return infile
def _prepare_pickled_model(tmp_path): p = tmp_path / "model.pickle" estimator = linear_model.LinearRegression() utils.train_model_regression(estimator) p.write_bytes(pickle.dumps(estimator)) return p
def test_linear_model(): estimator = xgboost.XGBRegressor(n_estimators=2, random_state=1, feature_selector="shuffle", booster="gblinear") utils.train_model_regression(estimator) assembler = assemblers.XGBoostModelAssemblerSelector(estimator) actual = assembler.assemble() feature_weight_mul = [ ast.BinNumExpr(ast.FeatureRef(0), ast.NumVal(-0.00999326), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(1), ast.NumVal(0.0520094), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(2), ast.NumVal(0.10447), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(3), ast.NumVal(0.17387), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(4), ast.NumVal(0.691745), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(5), ast.NumVal(0.296357), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(6), ast.NumVal(0.0288206), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(7), ast.NumVal(0.417822), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(8), ast.NumVal(0.0551116), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(9), ast.NumVal(0.00242449), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(10), ast.NumVal(0.109585), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(11), ast.NumVal(0.00744202), ast.BinNumOpType.MUL), ast.BinNumExpr(ast.FeatureRef(12), ast.NumVal(0.0731089), ast.BinNumOpType.MUL), ] expected = ast.SubroutineExpr( ast.BinNumExpr( ast.NumVal(0.5), assemblers.utils.apply_op_to_expressions(ast.BinNumOpType.ADD, ast.NumVal(3.13109), *feature_weight_mul), ast.BinNumOpType.ADD)) assert utils.cmp_exprs(actual, expected)