def test_lm_cv_grid(): cv = ml.get_default_cv("linear_model", "cl", search_type="grid") cv.param_grid = { "ml__penalty": ["l1"], "ml__C": [1e-5, 1e-3, 1e-1] } _test_basic_flow_holdout_pandas1(cv, True) _test_basic_flow_holdout_pandas2(cv, True) _test_basic_flow_cv_pandas(cv, True)
def test_xgboost_cv_grid(): cv = ml.get_default_cv("xgboost", "rg", search_type="grid") cv.param_grid = dict( ml__colsample_bynode=[0.1], ml__learning_rate=[0.01], ml__ratio_min_child_weight=[None, 0.005, 0.01] ) _test_basic_flow_holdout_pandas1(cv, False) _test_basic_flow_holdout_pandas2(cv, False) _test_basic_flow_cv_pandas(cv, False)
def test_random_holdout(setup): df_training, df_validation, df_test, feature_columns = setup is_cl = False target_column = utils.get_target_column(is_cl) cv_obj = ml.get_default_cv("linear_model", "rg", "r2") model = cv_obj.fit_holdout_pandas(df_training, target_column, feature_columns, df_validation=df_validation) analyzer = ml.CVAnalyzer(model.estimator) _basic_flow(analyzer)
def test_random_forest_cv_grid(): cv = ml.get_default_cv("random_forest", "cl", search_type="grid") cv.param_grid = dict( ml__max_depth=[2], ml__n_estimators=[100], ml__max_features=["auto"], ml__min_samples_leaf=[0.01, 0.05, 0.1] ) _test_basic_flow_holdout_pandas1(cv, True) _test_basic_flow_holdout_pandas2(cv, True) _test_basic_flow_cv_pandas(cv, True)
def test_lightgbm_cv_grid(): cv = ml.get_default_cv("lightgbm", "cl", search_type="grid") cv.param_grid = { "ml__num_leaves": [10], "ml__colsample_bytree": [0.1], "ml__learning_rate": [0.01], "ml__min_child_samples": [0, 20, 100] } _test_basic_flow_holdout_pandas1(cv, True) _test_basic_flow_holdout_pandas2(cv, True) _test_basic_flow_cv_pandas(cv, True)
def test_grid_holdout(setup): df_training, df_validation, df_test, feature_columns = setup is_cl = True target_column = utils.get_target_column(is_cl) cv_obj = ml.get_default_cv("linear_model", "cl", search_type="grid") cv_obj.parameter_grid = { "ml__penalty": ["l1", "l2"], "ml__C": [1e-5, 1e-3, 1e-1] } model = cv_obj.fit_holdout_pandas(df_training, target_column, feature_columns, ratio_training=0.8) analyzer = ml.CVAnalyzer(model.estimator) _basic_flow(analyzer)
def test_random_forest_cv_random(): cv = ml.get_default_cv("random_forest", "rg", search_type="random") cv.n_iter = 5 _test_basic_flow_holdout_pandas1(cv, False) _test_basic_flow_holdout_pandas2(cv, False) _test_basic_flow_cv_pandas(cv, False)
def test_xgboost_cv_random(): cv = ml.get_default_cv("xgboost", "cl", search_type="random") cv.n_iter = 5 _test_basic_flow_holdout_pandas1(cv, True) _test_basic_flow_holdout_pandas2(cv, True) _test_basic_flow_cv_pandas(cv, True)
def test_lm_cv_random(): cv = ml.get_default_cv("linear_model", "cl", search_type="random") cv.n_iter = 2 _test_basic_flow_holdout_pandas1(cv, True) _test_basic_flow_holdout_pandas2(cv, True) _test_basic_flow_cv_pandas(cv, True)