def __init__(self, globals_dict: dict) -> None: logger = get_logger() np.random.seed(globals_dict["seed"]) from sklearn.ensemble import AdaBoostClassifier args = {"random_state": globals_dict["seed"]} tune_args = {} tune_grid = { "n_estimators": np_list_arange(10, 300, 10, inclusive=True), "learning_rate": np_list_arange(0.001, 0.5, 0.001, inclusive=True), "algorithm": ["SAMME", "SAMME.R"], } tune_distributions = { "n_estimators": IntUniformDistribution(10, 300), "learning_rate": UniformDistribution(0.000001, 0.5, log=True), } leftover_parameters_to_categorical_distributions( tune_grid, tune_distributions) super().__init__( id="ada", name="Ada Boost Classifier", class_def=AdaBoostClassifier, args=args, tune_grid=tune_grid, tune_distribution=tune_distributions, tune_args=tune_args, shap=False, )
def __init__(self, globals_dict: dict) -> None: logger = get_logger() np.random.seed(globals_dict["seed"]) from sklearn.ensemble import BaggingClassifier args = { "random_state": globals_dict["seed"], "n_jobs": 1 if globals_dict["gpu_param"] else None, } tune_args = {} tune_grid = { "bootstrap": [True, False], "bootstrap_features": [True, False], "max_features": np_list_arange(0.4, 1, 0.1, inclusive=True), "max_samples": np_list_arange(0.4, 1, 0.1, inclusive=True), } tune_distributions = { "max_features": UniformDistribution(0.4, 1), "max_samples": UniformDistribution(0.4, 1), } leftover_parameters_to_categorical_distributions(tune_grid, tune_distributions) super().__init__( id="Bagging", name="Bagging Classifier", class_def=BaggingClassifier, args=args, tune_grid=tune_grid, tune_distribution=tune_distributions, tune_args=tune_args, shap=False, is_special=True, is_gpu_enabled=False, )
def __init__(self, globals_dict: dict) -> None: logger = get_logger() np.random.seed(globals_dict["seed"]) from sklearn.ensemble import ExtraTreesClassifier args = { "random_state": globals_dict["seed"], "n_jobs": globals_dict["n_jobs_param"], } tune_args = {} tune_grid = { "n_estimators": np_list_arange(10, 300, 10, inclusive=True), "criterion": ["gini", "entropy"], "max_depth": np_list_arange(1, 11, 1, inclusive=True), "min_impurity_decrease": [ 0, 0.0001, 0.001, 0.01, 0.0002, 0.002, 0.02, 0.0005, 0.005, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, ], "max_features": [1.0, "sqrt", "log2"], "bootstrap": [True, False], "min_samples_split": [2, 5, 7, 9, 10], "min_samples_leaf": [2, 3, 4, 5, 6], "class_weight": ["balanced", "balanced_subsample", {}], } tune_distributions = { "n_estimators": IntUniformDistribution(10, 300), "max_depth": IntUniformDistribution(1, 11), "min_samples_split": IntUniformDistribution(2, 10), "min_samples_leaf": IntUniformDistribution(1, 5), "max_features": UniformDistribution(0.4, 1), "min_impurity_decrease": UniformDistribution(0.000000001, 0.5, log=True), } leftover_parameters_to_categorical_distributions(tune_grid, tune_distributions) super().__init__( id="et", name="Extra Trees Classifier", class_def=ExtraTreesClassifier, args=args, tune_grid=tune_grid, tune_distribution=tune_distributions, tune_args=tune_args, shap="type1", )
def __init__(self, globals_dict: dict) -> None: logger = get_logger() np.random.seed(globals_dict["seed"]) from sklearn.ensemble import GradientBoostingClassifier args = {"random_state": globals_dict["seed"]} tune_args = {} tune_grid = { "n_estimators": np_list_arange(10, 300, 10, inclusive=True), "learning_rate": np_list_arange(0.001, 0.5, 0.001, inclusive=True), "subsample": np_list_arange(0.2, 1, 0.05, inclusive=True), "min_samples_split": [2, 4, 5, 7, 9, 10], "min_samples_leaf": [1, 2, 3, 4, 5], "max_depth": np_list_arange(1, 11, 1, inclusive=True), "min_impurity_decrease": [ 0, 0.0001, 0.001, 0.01, 0.0002, 0.002, 0.02, 0.0005, 0.005, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, ], "max_features": [1.0, "sqrt", "log2"], } tune_distributions = { "n_estimators": IntUniformDistribution(10, 300), "learning_rate": UniformDistribution(0.000001, 0.5, log=True), "subsample": UniformDistribution(0.2, 1), "min_samples_split": IntUniformDistribution(2, 10), "min_samples_leaf": IntUniformDistribution(1, 5), "max_depth": IntUniformDistribution(1, 11), "min_impurity_decrease": UniformDistribution(0.000000001, 0.5, log=True), "max_features": UniformDistribution(0.4, 1), } leftover_parameters_to_categorical_distributions(tune_grid, tune_distributions) super().__init__( id="gbc", name="Gradient Boosting Classifier", class_def=GradientBoostingClassifier, args=args, tune_grid=tune_grid, tune_distribution=tune_distributions, tune_args=tune_args, shap=False, )
def __init__(self, globals_dict: dict) -> None: logger = get_logger() np.random.seed(globals_dict["seed"]) from catboost import CatBoostClassifier # suppress output logging.getLogger("catboost").setLevel(logging.ERROR) use_gpu = globals_dict["gpu_param"] == "force" or ( globals_dict["gpu_param"] and len(globals_dict["X_train"]) >= 50000) args = { "random_state": globals_dict["seed"], "verbose": False, "thread_count": globals_dict["n_jobs_param"], "task_type": "GPU" if use_gpu else "CPU", "border_count": 32 if use_gpu else 254, } tune_args = {} tune_grid = { "depth": list(range(1, 12)), "n_estimators": np_list_arange(10, 300, 10, inclusive=True), "random_strength": np_list_arange(0, 0.8, 0.1, inclusive=True), "l2_leaf_reg": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 50, 100, 200], } tune_distributions = { "depth": IntUniformDistribution(1, 11), "n_estimators": IntUniformDistribution(10, 300), "random_strength": UniformDistribution(0, 0.8), "l2_leaf_reg": IntUniformDistribution(1, 200, log=True), } if use_gpu: tune_grid["depth"] = list(range(1, 9)) tune_distributions["depth"] = (IntUniformDistribution(1, 8), ) leftover_parameters_to_categorical_distributions( tune_grid, tune_distributions) super().__init__( id="catboost", name="CatBoost Classifier", class_def=CatBoostClassifier, args=args, tune_grid=tune_grid, tune_distribution=tune_distributions, tune_args=tune_args, shap="type2", is_gpu_enabled=use_gpu, )
def __init__(self, globals_dict: dict) -> None: logger = get_logger() np.random.seed(globals_dict["seed"]) from sklearn.tree import DecisionTreeClassifier args = {"random_state": globals_dict["seed"]} tune_args = {} tune_grid = { "max_depth": np_list_arange(1, 16, 1, inclusive=True), "max_features": [1.0, "sqrt", "log2"], "min_samples_leaf": [2, 3, 4, 5, 6], "min_samples_split": [2, 5, 7, 9, 10], "criterion": ["gini", "entropy"], "min_impurity_decrease": [ 0, 0.0001, 0.001, 0.01, 0.0002, 0.002, 0.02, 0.0005, 0.005, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, ], } tune_distributions = { "max_depth": IntUniformDistribution(1, 16), "max_features": UniformDistribution(0.4, 1), "min_samples_leaf": IntUniformDistribution(2, 6), "min_samples_split": IntUniformDistribution(2, 10), "min_impurity_decrease": UniformDistribution(0.000000001, 0.5, log=True), } leftover_parameters_to_categorical_distributions( tune_grid, tune_distributions) super().__init__( id="dt", name="Decision Tree Classifier", class_def=DecisionTreeClassifier, args=args, tune_grid=tune_grid, tune_distribution=tune_distributions, tune_args=tune_args, shap="type1", )
def __init__(self, globals_dict: dict) -> None: logger = get_logger() np.random.seed(globals_dict["seed"]) gpu_imported = False from sklearn.linear_model import RidgeClassifier if globals_dict["gpu_param"] == "force": import cuml.linear_model logger.info("Imported cuml.linear_model") gpu_imported = True elif globals_dict["gpu_param"]: try: import cuml.linear_model logger.info("Imported cuml.linear_model") gpu_imported = True except ImportError: logger.warning("Couldn't import cuml.linear_model") args = {} tune_args = {} tune_grid = {} tune_distributions = {} if gpu_imported: RidgeClassifier = pycaret.internal.cuml_wrappers.get_ridge_classifier( ) else: args = {"random_state": globals_dict["seed"]} tune_grid = { "normalize": [True, False], } tune_grid["alpha"] = np_list_arange(0.01, 10, 0.01, inclusive=False) tune_grid["fit_intercept"] = [True, False] tune_distributions["alpha"] = UniformDistribution(0.001, 10) leftover_parameters_to_categorical_distributions( tune_grid, tune_distributions) super().__init__( id="ridge", name="Ridge Classifier", class_def=RidgeClassifier, args=args, tune_grid=tune_grid, tune_distribution=tune_distributions, tune_args=tune_args, shap=False, is_gpu_enabled=gpu_imported, ) if gpu_imported: self.reference = get_class_name(cuml.linear_model.Ridge)
def __init__(self, globals_dict: dict) -> None: logger = get_logger() np.random.seed(globals_dict["seed"]) gpu_imported = False from sklearn.svm import SVC if globals_dict["gpu_param"] == "force": from cuml.svm import SVC logger.info("Imported cuml.svm.SVC") gpu_imported = True elif globals_dict["gpu_param"]: try: from cuml.svm import SVC logger.info("Imported cuml.svm.SVC") gpu_imported = True except ImportError: logger.warning("Couldn't import cuml.svm.SVC") args = { "gamma": "auto", "C": 1.0, "probability": True, "kernel": "rbf", "random_state": globals_dict["seed"], } tune_args = {} tune_grid = { "C": np_list_arange(0, 50, 0.01, inclusive=True), "class_weight": ["balanced", {}], } tune_distributions = { "C": UniformDistribution(0, 50), } leftover_parameters_to_categorical_distributions( tune_grid, tune_distributions) if gpu_imported: SVC = get_svc_classifier() super().__init__( id="rbfsvm", name="SVM - Radial Kernel", class_def=SVC, args=args, tune_grid=tune_grid, tune_distribution=tune_distributions, tune_args=tune_args, shap=False, is_turbo=False, )
def __init__(self, globals_dict: dict) -> None: logger = get_logger() np.random.seed(globals_dict["seed"]) gpu_imported = False from sklearn.linear_model import LogisticRegression if globals_dict["gpu_param"] == "force": from cuml.linear_model import LogisticRegression logger.info("Imported cuml.linear_model.LogisticRegression") gpu_imported = True elif globals_dict["gpu_param"]: try: from cuml.linear_model import LogisticRegression logger.info("Imported cuml.linear_model.LogisticRegression") gpu_imported = True except ImportError: logger.warning( "Couldn't import cuml.linear_model.LogisticRegression") args = {"max_iter": 1000} tune_args = {} tune_grid = {} tune_distributions = {} # common tune_grid["penalty"] = ["l2", "none"] tune_grid["C"] = np_list_arange(0, 10, 0.001, inclusive=True) if gpu_imported: tune_grid["penalty"] += ["l1"] else: args["random_state"] = globals_dict["seed"] tune_grid["class_weight"] = ["balanced", {}] tune_distributions["C"] = UniformDistribution(0, 10) leftover_parameters_to_categorical_distributions( tune_grid, tune_distributions) super().__init__( id="lr", name="Logistic Regression", class_def=LogisticRegression, args=args, tune_grid=tune_grid, tune_distribution=tune_distributions, tune_args=tune_args, shap=False, )
def __init__(self, globals_dict: dict) -> None: logger = get_logger() np.random.seed(globals_dict["seed"]) from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis args = {} tune_args = {} tune_grid = {"reg_param": np_list_arange(0, 1, 0.01, inclusive=True)} tune_distributions = {"reg_param": UniformDistribution(0, 1)} leftover_parameters_to_categorical_distributions(tune_grid, tune_distributions) super().__init__( id="qda", name="Quadratic Discriminant Analysis", class_def=QuadraticDiscriminantAnalysis, args=args, tune_grid=tune_grid, tune_distribution=tune_distributions, tune_args=tune_args, shap=False, )
def __init__(self, globals_dict: dict) -> None: logger = get_logger() np.random.seed(globals_dict["seed"]) gpu_imported = False from sklearn.ensemble import RandomForestClassifier if globals_dict["gpu_param"] == "force": import cuml.ensemble logger.info("Imported cuml.ensemble") gpu_imported = True elif globals_dict["gpu_param"]: try: import cuml.ensemble logger.info("Imported cuml.ensemble") gpu_imported = True except ImportError: logger.warning("Couldn't import cuml.ensemble") if gpu_imported: RandomForestClassifier = ( pycaret.internal.cuml_wrappers.get_random_forest_classifier()) args = ({ "random_state": globals_dict["seed"], "n_jobs": globals_dict["n_jobs_param"], } if not gpu_imported else { "seed": globals_dict["seed"] }) tune_args = {} tune_grid = { "n_estimators": np_list_arange(10, 300, 10, inclusive=True), "max_depth": np_list_arange(1, 11, 1, inclusive=True), "min_impurity_decrease": [ 0, 0.0001, 0.001, 0.01, 0.0002, 0.002, 0.02, 0.0005, 0.005, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, ], "max_features": [1.0, "sqrt", "log2"], "bootstrap": [True, False], } tune_distributions = { "n_estimators": IntUniformDistribution(10, 300), "max_depth": IntUniformDistribution(1, 11), "min_impurity_decrease": UniformDistribution(0.000000001, 0.5, log=True), "max_features": UniformDistribution(0.4, 1), } if gpu_imported: tune_grid["split_criterion"] = [0, 1] else: tune_grid["criterion"] = ["gini", "entropy"] tune_grid["class_weight"] = ["balanced", "balanced_subsample", {}] tune_grid["min_samples_split"] = [2, 5, 7, 9, 10] tune_grid["min_samples_leaf"] = [2, 3, 4, 5, 6] tune_distributions["min_samples_split"] = IntUniformDistribution( 2, 10) tune_distributions["min_samples_leaf"] = IntUniformDistribution( 2, 6) leftover_parameters_to_categorical_distributions( tune_grid, tune_distributions) super().__init__( id="rf", name="Random Forest Classifier", class_def=RandomForestClassifier, args=args, tune_grid=tune_grid, tune_distribution=tune_distributions, tune_args=tune_args, shap="type1", is_gpu_enabled=gpu_imported, ) if gpu_imported: self.reference = get_class_name( cuml.ensemble.RandomForestClassifier)
def __init__(self, globals_dict: dict) -> None: logger = get_logger() np.random.seed(globals_dict["seed"]) gpu_imported = False from sklearn.linear_model import SGDClassifier if globals_dict["gpu_param"] == "force": from cuml import MBSGDClassifier as SGDClassifier logger.info("Imported cuml.MBSGDClassifier") gpu_imported = True elif globals_dict["gpu_param"]: try: from cuml import MBSGDClassifier as SGDClassifier logger.info("Imported cuml.MBSGDClassifier") gpu_imported = True except ImportError: logger.warning("Couldn't import cuml.MBSGDClassifier") args = {"tol": 0.001, "loss": "hinge", "penalty": "l2", "eta0": 0.001} tune_args = {} tune_grid = { "penalty": ["elasticnet", "l2", "l1"], "l1_ratio": np_list_arange(0.0000000001, 1, 0.01, inclusive=False), "alpha": [ 0.0000001, 0.000001, 0.0001, 0.001, 0.01, 0.0002, 0.002, 0.02, 0.0005, 0.005, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, ], "fit_intercept": [True, False], "learning_rate": ["constant", "invscaling", "adaptive", "optimal"], "eta0": [0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5], } tune_distributions = { "l1_ratio": UniformDistribution(0.0000000001, 0.9999999999), "alpha": UniformDistribution(0.0000000001, 0.9999999999, log=True), "eta0": UniformDistribution(0.001, 0.5, log=True), } if gpu_imported: tune_grid["learning_rate"].remove("optimal") batch_size = [ (512, 50000), (256, 25000), (128, 10000), (64, 5000), (32, 1000), (16, 0), ] for arg, x_len in batch_size: if len(globals_dict["X_train"]) >= x_len: args["batch_size"] = arg break else: args["random_state"] = globals_dict["seed"] args["n_jobs"] = globals_dict["n_jobs_param"] leftover_parameters_to_categorical_distributions( tune_grid, tune_distributions) super().__init__( id="svm", name="SVM - Linear Kernel", class_def=SGDClassifier, args=args, tune_grid=tune_grid, tune_distribution=tune_distributions, tune_args=tune_args, shap=False, )
def __init__(self, globals_dict: dict) -> None: logger = get_logger() np.random.seed(globals_dict["seed"]) from lightgbm import LGBMClassifier from lightgbm.basic import LightGBMError args = { "random_state": globals_dict["seed"], "n_jobs": globals_dict["n_jobs_param"], } tune_args = {} tune_grid = { "num_leaves": [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 150, 200], "learning_rate": np_list_arange(0.001, 0.5, 0.001, inclusive=True), "n_estimators": np_list_arange(10, 300, 10, inclusive=True), "min_split_gain": [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], "reg_alpha": [ 0.0000001, 0.000001, 0.0001, 0.001, 0.01, 0.0005, 0.005, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.7, 1, 2, 3, 4, 5, 10, ], "reg_lambda": [ 0.0000001, 0.000001, 0.0001, 0.001, 0.01, 0.0005, 0.005, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.7, 1, 2, 3, 4, 5, 10, ], "feature_fraction": np_list_arange(0.4, 1, 0.1, inclusive=True), "bagging_fraction": np_list_arange(0.4, 1, 0.1, inclusive=True), "bagging_freq": [1, 2, 3, 4, 5, 6, 7], "min_child_samples": np_list_arange(5, 100, 5, inclusive=True), } tune_distributions = { "num_leaves": IntUniformDistribution(10, 200), "learning_rate": UniformDistribution(0.000001, 0.5, log=True), "n_estimators": IntUniformDistribution(10, 300), "min_split_gain": UniformDistribution(0, 1), "reg_alpha": UniformDistribution(0.0000000001, 10, log=True), "reg_lambda": UniformDistribution(0.0000000001, 10, log=True), "min_data_in_leaf": IntUniformDistribution(10, 10000), "feature_fraction": UniformDistribution(0.4, 1), "bagging_fraction": UniformDistribution(0.4, 1), "bagging_freq": IntUniformDistribution(1, 7), "min_child_samples": IntUniformDistribution(5, 100), } leftover_parameters_to_categorical_distributions( tune_grid, tune_distributions) is_gpu_enabled = False if globals_dict["gpu_param"]: try: lgb = LGBMClassifier(device="gpu") lgb.fit(np.zeros((2, 2)), [0, 1]) is_gpu_enabled = True del lgb except LightGBMError: is_gpu_enabled = False if globals_dict["gpu_param"] == "force": raise RuntimeError( f"LightGBM GPU mode not available. Consult https://lightgbm.readthedocs.io/en/latest/GPU-Tutorial.html." ) if is_gpu_enabled: args["device"] = "gpu" super().__init__( id="lightgbm", name="Light Gradient Boosting Machine", class_def=LGBMClassifier, args=args, tune_grid=tune_grid, tune_distribution=tune_distributions, tune_args=tune_args, shap="type1", is_gpu_enabled=is_gpu_enabled, )
def __init__(self, globals_dict: dict) -> None: logger = get_logger() np.random.seed(globals_dict["seed"]) from xgboost import XGBClassifier args = { "random_state": globals_dict["seed"], "n_jobs": globals_dict["n_jobs_param"], "verbosity": 0, "booster": "gbtree", "tree_method": "gpu_hist" if globals_dict["gpu_param"] else "auto", } tune_args = {} tune_grid = { "learning_rate": np_list_arange(0.001, 0.5, 0.001, inclusive=True), "n_estimators": np_list_arange(10, 300, 10, inclusive=True), "subsample": [0.2, 0.3, 0.5, 0.7, 0.9, 1], "max_depth": np_list_arange(1, 11, 1, inclusive=True), "colsample_bytree": [0.5, 0.7, 0.9, 1], "min_child_weight": [1, 2, 3, 4], "reg_alpha": [ 0.0000001, 0.000001, 0.0001, 0.001, 0.01, 0.0005, 0.005, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.7, 1, 2, 3, 4, 5, 10, ], "reg_lambda": [ 0.0000001, 0.000001, 0.0001, 0.001, 0.01, 0.0005, 0.005, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.7, 1, 2, 3, 4, 5, 10, ], "scale_pos_weight": np_list_arange(0, 50, 0.1, inclusive=True), } tune_distributions = { "learning_rate": UniformDistribution(0.000001, 0.5, log=True), "n_estimators": IntUniformDistribution(10, 300), "subsample": UniformDistribution(0.2, 1), "max_depth": IntUniformDistribution(1, 11), "colsample_bytree": UniformDistribution(0.5, 1), "min_child_weight": IntUniformDistribution(1, 4), "reg_alpha": UniformDistribution(0.0000000001, 10, log=True), "reg_lambda": UniformDistribution(0.0000000001, 10, log=True), "scale_pos_weight": UniformDistribution(1, 50), } leftover_parameters_to_categorical_distributions( tune_grid, tune_distributions) super().__init__( id="xgboost", name="Extreme Gradient Boosting", class_def=XGBClassifier, args=args, tune_grid=tune_grid, tune_distribution=tune_distributions, tune_args=tune_args, shap="type2", is_gpu_enabled=bool(globals_dict["gpu_param"]), )
def __init__(self, globals_dict: dict) -> None: logger = get_logger() np.random.seed(globals_dict["seed"]) try: import catboost except ImportError: logger.warning("Couldn't import catboost.CatBoostClassifier") self.active = False return catboost_version = tuple( [int(x) for x in catboost.__version__.split(".")]) if catboost_version < (0, 23, 2): logger.warning( f"Wrong catboost version. Expected catboost>=0.23.2, got catboost=={catboost_version}" ) self.active = False return from catboost import CatBoostClassifier # suppress output logging.getLogger("catboost").setLevel(logging.ERROR) use_gpu = globals_dict["gpu_param"] == "force" or ( globals_dict["gpu_param"] and len(globals_dict["X_train"]) >= 50000) args = { "random_state": globals_dict["seed"], "verbose": False, "thread_count": globals_dict["n_jobs_param"], "task_type": "GPU" if use_gpu else "CPU", "border_count": 32 if use_gpu else 254, } tune_args = {} tune_grid = { "depth": list(range(1, 12)), "n_estimators": np_list_arange(10, 300, 10, inclusive=True), "random_strength": np_list_arange(0, 0.8, 0.1, inclusive=True), "l2_leaf_reg": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 50, 100, 200], } tune_distributions = { "depth": IntUniformDistribution(1, 11), "n_estimators": IntUniformDistribution(10, 300), "random_strength": UniformDistribution(0, 0.8), "l2_leaf_reg": IntUniformDistribution(1, 200, log=True), } if use_gpu: tune_grid["depth"] = list(range(1, 9)) tune_distributions["depth"] = (IntUniformDistribution(1, 8), ) leftover_parameters_to_categorical_distributions( tune_grid, tune_distributions) super().__init__( id="catboost", name="CatBoost Classifier", class_def=CatBoostClassifier, args=args, tune_grid=tune_grid, tune_distribution=tune_distributions, tune_args=tune_args, shap="type2", is_gpu_enabled=use_gpu, )
def __init__(self, globals_dict: dict) -> None: logger = get_logger() np.random.seed(globals_dict["seed"]) try: import xgboost except ImportError: logger.warning("Couldn't import xgboost.XGBClassifier") self.active = False return xgboost_version = tuple( [int(x) for x in xgboost.__version__.split(".")]) if xgboost_version < (1, 1, 0): logger.warning( f"Wrong xgboost version. Expected xgboost>=1.1.0, got xgboost=={xgboost_version}" ) self.active = False return from xgboost import XGBClassifier args = { "random_state": globals_dict["seed"], "n_jobs": globals_dict["n_jobs_param"], "verbosity": 0, "booster": "gbtree", "tree_method": "gpu_hist" if globals_dict["gpu_param"] else "auto", } tune_args = {} tune_grid = { "learning_rate": np_list_arange(0.001, 0.5, 0.001, inclusive=True), "n_estimators": np_list_arange(10, 300, 10, inclusive=True), "subsample": [0.2, 0.3, 0.5, 0.7, 0.9, 1], "max_depth": np_list_arange(1, 11, 1, inclusive=True), "colsample_bytree": [0.5, 0.7, 0.9, 1], "min_child_weight": [1, 2, 3, 4], "reg_alpha": [ 0.0000001, 0.000001, 0.0001, 0.001, 0.01, 0.0005, 0.005, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.7, 1, 2, 3, 4, 5, 10, ], "reg_lambda": [ 0.0000001, 0.000001, 0.0001, 0.001, 0.01, 0.0005, 0.005, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.7, 1, 2, 3, 4, 5, 10, ], "scale_pos_weight": np_list_arange(0, 50, 0.1, inclusive=True), } tune_distributions = { "learning_rate": UniformDistribution(0.000001, 0.5, log=True), "n_estimators": IntUniformDistribution(10, 300), "subsample": UniformDistribution(0.2, 1), "max_depth": IntUniformDistribution(1, 11), "colsample_bytree": UniformDistribution(0.5, 1), "min_child_weight": IntUniformDistribution(1, 4), "reg_alpha": UniformDistribution(0.0000000001, 10, log=True), "reg_lambda": UniformDistribution(0.0000000001, 10, log=True), "scale_pos_weight": UniformDistribution(1, 50), } leftover_parameters_to_categorical_distributions( tune_grid, tune_distributions) super().__init__( id="xgboost", name="Extreme Gradient Boosting", class_def=XGBClassifier, args=args, tune_grid=tune_grid, tune_distribution=tune_distributions, tune_args=tune_args, shap="type2", is_gpu_enabled=bool(globals_dict["gpu_param"]), )