Ejemplo n.º 1
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from sklearn.ensemble import AdaBoostClassifier

        args = {"random_state": globals_dict["seed"]}
        tune_args = {}
        tune_grid = {
            "n_estimators": np_list_arange(10, 300, 10, inclusive=True),
            "learning_rate": np_list_arange(0.001, 0.5, 0.001, inclusive=True),
            "algorithm": ["SAMME", "SAMME.R"],
        }
        tune_distributions = {
            "n_estimators": IntUniformDistribution(10, 300),
            "learning_rate": UniformDistribution(0.000001, 0.5, log=True),
        }

        leftover_parameters_to_categorical_distributions(
            tune_grid, tune_distributions)

        super().__init__(
            id="ada",
            name="Ada Boost Classifier",
            class_def=AdaBoostClassifier,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap=False,
        )
Ejemplo n.º 2
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from sklearn.ensemble import BaggingClassifier

        args = {
            "random_state": globals_dict["seed"],
            "n_jobs": 1 if globals_dict["gpu_param"] else None,
        }
        tune_args = {}
        tune_grid = {
            "bootstrap": [True, False],
            "bootstrap_features": [True, False],
            "max_features": np_list_arange(0.4, 1, 0.1, inclusive=True),
            "max_samples": np_list_arange(0.4, 1, 0.1, inclusive=True),
        }
        tune_distributions = {
            "max_features": UniformDistribution(0.4, 1),
            "max_samples": UniformDistribution(0.4, 1),
        }

        leftover_parameters_to_categorical_distributions(tune_grid, tune_distributions)

        super().__init__(
            id="Bagging",
            name="Bagging Classifier",
            class_def=BaggingClassifier,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap=False,
            is_special=True,
            is_gpu_enabled=False,
        )
Ejemplo n.º 3
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from sklearn.ensemble import ExtraTreesClassifier

        args = {
            "random_state": globals_dict["seed"],
            "n_jobs": globals_dict["n_jobs_param"],
        }
        tune_args = {}
        tune_grid = {
            "n_estimators": np_list_arange(10, 300, 10, inclusive=True),
            "criterion": ["gini", "entropy"],
            "max_depth": np_list_arange(1, 11, 1, inclusive=True),
            "min_impurity_decrease": [
                0,
                0.0001,
                0.001,
                0.01,
                0.0002,
                0.002,
                0.02,
                0.0005,
                0.005,
                0.05,
                0.1,
                0.2,
                0.3,
                0.4,
                0.5,
            ],
            "max_features": [1.0, "sqrt", "log2"],
            "bootstrap": [True, False],
            "min_samples_split": [2, 5, 7, 9, 10],
            "min_samples_leaf": [2, 3, 4, 5, 6],
            "class_weight": ["balanced", "balanced_subsample", {}],
        }
        tune_distributions = {
            "n_estimators": IntUniformDistribution(10, 300),
            "max_depth": IntUniformDistribution(1, 11),
            "min_samples_split": IntUniformDistribution(2, 10),
            "min_samples_leaf": IntUniformDistribution(1, 5),
            "max_features": UniformDistribution(0.4, 1),
            "min_impurity_decrease": UniformDistribution(0.000000001, 0.5, log=True),
        }

        leftover_parameters_to_categorical_distributions(tune_grid, tune_distributions)

        super().__init__(
            id="et",
            name="Extra Trees Classifier",
            class_def=ExtraTreesClassifier,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap="type1",
        )
Ejemplo n.º 4
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from sklearn.ensemble import GradientBoostingClassifier

        args = {"random_state": globals_dict["seed"]}
        tune_args = {}
        tune_grid = {
            "n_estimators": np_list_arange(10, 300, 10, inclusive=True),
            "learning_rate": np_list_arange(0.001, 0.5, 0.001, inclusive=True),
            "subsample": np_list_arange(0.2, 1, 0.05, inclusive=True),
            "min_samples_split": [2, 4, 5, 7, 9, 10],
            "min_samples_leaf": [1, 2, 3, 4, 5],
            "max_depth": np_list_arange(1, 11, 1, inclusive=True),
            "min_impurity_decrease": [
                0,
                0.0001,
                0.001,
                0.01,
                0.0002,
                0.002,
                0.02,
                0.0005,
                0.005,
                0.05,
                0.1,
                0.2,
                0.3,
                0.4,
                0.5,
            ],
            "max_features": [1.0, "sqrt", "log2"],
        }
        tune_distributions = {
            "n_estimators": IntUniformDistribution(10, 300),
            "learning_rate": UniformDistribution(0.000001, 0.5, log=True),
            "subsample": UniformDistribution(0.2, 1),
            "min_samples_split": IntUniformDistribution(2, 10),
            "min_samples_leaf": IntUniformDistribution(1, 5),
            "max_depth": IntUniformDistribution(1, 11),
            "min_impurity_decrease": UniformDistribution(0.000000001, 0.5, log=True),
            "max_features": UniformDistribution(0.4, 1),
        }

        leftover_parameters_to_categorical_distributions(tune_grid, tune_distributions)

        super().__init__(
            id="gbc",
            name="Gradient Boosting Classifier",
            class_def=GradientBoostingClassifier,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap=False,
        )
Ejemplo n.º 5
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from catboost import CatBoostClassifier

        # suppress output
        logging.getLogger("catboost").setLevel(logging.ERROR)

        use_gpu = globals_dict["gpu_param"] == "force" or (
            globals_dict["gpu_param"]
            and len(globals_dict["X_train"]) >= 50000)

        args = {
            "random_state": globals_dict["seed"],
            "verbose": False,
            "thread_count": globals_dict["n_jobs_param"],
            "task_type": "GPU" if use_gpu else "CPU",
            "border_count": 32 if use_gpu else 254,
        }
        tune_args = {}
        tune_grid = {
            "depth": list(range(1, 12)),
            "n_estimators": np_list_arange(10, 300, 10, inclusive=True),
            "random_strength": np_list_arange(0, 0.8, 0.1, inclusive=True),
            "l2_leaf_reg":
            [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 50, 100, 200],
        }
        tune_distributions = {
            "depth": IntUniformDistribution(1, 11),
            "n_estimators": IntUniformDistribution(10, 300),
            "random_strength": UniformDistribution(0, 0.8),
            "l2_leaf_reg": IntUniformDistribution(1, 200, log=True),
        }

        if use_gpu:
            tune_grid["depth"] = list(range(1, 9))
            tune_distributions["depth"] = (IntUniformDistribution(1, 8), )

        leftover_parameters_to_categorical_distributions(
            tune_grid, tune_distributions)

        super().__init__(
            id="catboost",
            name="CatBoost Classifier",
            class_def=CatBoostClassifier,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap="type2",
            is_gpu_enabled=use_gpu,
        )
Ejemplo n.º 6
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from sklearn.tree import DecisionTreeClassifier

        args = {"random_state": globals_dict["seed"]}
        tune_args = {}
        tune_grid = {
            "max_depth":
            np_list_arange(1, 16, 1, inclusive=True),
            "max_features": [1.0, "sqrt", "log2"],
            "min_samples_leaf": [2, 3, 4, 5, 6],
            "min_samples_split": [2, 5, 7, 9, 10],
            "criterion": ["gini", "entropy"],
            "min_impurity_decrease": [
                0,
                0.0001,
                0.001,
                0.01,
                0.0002,
                0.002,
                0.02,
                0.0005,
                0.005,
                0.05,
                0.1,
                0.2,
                0.3,
                0.4,
                0.5,
            ],
        }
        tune_distributions = {
            "max_depth":
            IntUniformDistribution(1, 16),
            "max_features":
            UniformDistribution(0.4, 1),
            "min_samples_leaf":
            IntUniformDistribution(2, 6),
            "min_samples_split":
            IntUniformDistribution(2, 10),
            "min_impurity_decrease":
            UniformDistribution(0.000000001, 0.5, log=True),
        }

        leftover_parameters_to_categorical_distributions(
            tune_grid, tune_distributions)

        super().__init__(
            id="dt",
            name="Decision Tree Classifier",
            class_def=DecisionTreeClassifier,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap="type1",
        )
Ejemplo n.º 7
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        gpu_imported = False

        from sklearn.linear_model import RidgeClassifier

        if globals_dict["gpu_param"] == "force":
            import cuml.linear_model

            logger.info("Imported cuml.linear_model")
            gpu_imported = True
        elif globals_dict["gpu_param"]:
            try:
                import cuml.linear_model

                logger.info("Imported cuml.linear_model")
                gpu_imported = True
            except ImportError:
                logger.warning("Couldn't import cuml.linear_model")

        args = {}
        tune_args = {}
        tune_grid = {}
        tune_distributions = {}

        if gpu_imported:
            RidgeClassifier = pycaret.internal.cuml_wrappers.get_ridge_classifier(
            )
        else:
            args = {"random_state": globals_dict["seed"]}

        tune_grid = {
            "normalize": [True, False],
        }

        tune_grid["alpha"] = np_list_arange(0.01, 10, 0.01, inclusive=False)
        tune_grid["fit_intercept"] = [True, False]
        tune_distributions["alpha"] = UniformDistribution(0.001, 10)

        leftover_parameters_to_categorical_distributions(
            tune_grid, tune_distributions)

        super().__init__(
            id="ridge",
            name="Ridge Classifier",
            class_def=RidgeClassifier,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap=False,
            is_gpu_enabled=gpu_imported,
        )
        if gpu_imported:
            self.reference = get_class_name(cuml.linear_model.Ridge)
Ejemplo n.º 8
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        gpu_imported = False

        from sklearn.svm import SVC

        if globals_dict["gpu_param"] == "force":
            from cuml.svm import SVC

            logger.info("Imported cuml.svm.SVC")
            gpu_imported = True
        elif globals_dict["gpu_param"]:
            try:
                from cuml.svm import SVC

                logger.info("Imported cuml.svm.SVC")
                gpu_imported = True
            except ImportError:
                logger.warning("Couldn't import cuml.svm.SVC")

        args = {
            "gamma": "auto",
            "C": 1.0,
            "probability": True,
            "kernel": "rbf",
            "random_state": globals_dict["seed"],
        }
        tune_args = {}
        tune_grid = {
            "C": np_list_arange(0, 50, 0.01, inclusive=True),
            "class_weight": ["balanced", {}],
        }
        tune_distributions = {
            "C": UniformDistribution(0, 50),
        }

        leftover_parameters_to_categorical_distributions(
            tune_grid, tune_distributions)

        if gpu_imported:
            SVC = get_svc_classifier()

        super().__init__(
            id="rbfsvm",
            name="SVM - Radial Kernel",
            class_def=SVC,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap=False,
            is_turbo=False,
        )
Ejemplo n.º 9
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        gpu_imported = False

        from sklearn.linear_model import LogisticRegression

        if globals_dict["gpu_param"] == "force":
            from cuml.linear_model import LogisticRegression

            logger.info("Imported cuml.linear_model.LogisticRegression")
            gpu_imported = True
        elif globals_dict["gpu_param"]:
            try:
                from cuml.linear_model import LogisticRegression

                logger.info("Imported cuml.linear_model.LogisticRegression")
                gpu_imported = True
            except ImportError:
                logger.warning(
                    "Couldn't import cuml.linear_model.LogisticRegression")

        args = {"max_iter": 1000}
        tune_args = {}
        tune_grid = {}
        tune_distributions = {}

        # common
        tune_grid["penalty"] = ["l2", "none"]
        tune_grid["C"] = np_list_arange(0, 10, 0.001, inclusive=True)

        if gpu_imported:
            tune_grid["penalty"] += ["l1"]
        else:
            args["random_state"] = globals_dict["seed"]

            tune_grid["class_weight"] = ["balanced", {}]

        tune_distributions["C"] = UniformDistribution(0, 10)
        leftover_parameters_to_categorical_distributions(
            tune_grid, tune_distributions)

        super().__init__(
            id="lr",
            name="Logistic Regression",
            class_def=LogisticRegression,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap=False,
        )
Ejemplo n.º 10
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

        args = {}
        tune_args = {}
        tune_grid = {"reg_param": np_list_arange(0, 1, 0.01, inclusive=True)}
        tune_distributions = {"reg_param": UniformDistribution(0, 1)}

        leftover_parameters_to_categorical_distributions(tune_grid, tune_distributions)

        super().__init__(
            id="qda",
            name="Quadratic Discriminant Analysis",
            class_def=QuadraticDiscriminantAnalysis,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap=False,
        )
Ejemplo n.º 11
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        gpu_imported = False

        from sklearn.ensemble import RandomForestClassifier

        if globals_dict["gpu_param"] == "force":
            import cuml.ensemble

            logger.info("Imported cuml.ensemble")
            gpu_imported = True
        elif globals_dict["gpu_param"]:
            try:
                import cuml.ensemble

                logger.info("Imported cuml.ensemble")
                gpu_imported = True
            except ImportError:
                logger.warning("Couldn't import cuml.ensemble")

        if gpu_imported:
            RandomForestClassifier = (
                pycaret.internal.cuml_wrappers.get_random_forest_classifier())

        args = ({
            "random_state": globals_dict["seed"],
            "n_jobs": globals_dict["n_jobs_param"],
        } if not gpu_imported else {
            "seed": globals_dict["seed"]
        })
        tune_args = {}
        tune_grid = {
            "n_estimators":
            np_list_arange(10, 300, 10, inclusive=True),
            "max_depth":
            np_list_arange(1, 11, 1, inclusive=True),
            "min_impurity_decrease": [
                0,
                0.0001,
                0.001,
                0.01,
                0.0002,
                0.002,
                0.02,
                0.0005,
                0.005,
                0.05,
                0.1,
                0.2,
                0.3,
                0.4,
                0.5,
            ],
            "max_features": [1.0, "sqrt", "log2"],
            "bootstrap": [True, False],
        }
        tune_distributions = {
            "n_estimators":
            IntUniformDistribution(10, 300),
            "max_depth":
            IntUniformDistribution(1, 11),
            "min_impurity_decrease":
            UniformDistribution(0.000000001, 0.5, log=True),
            "max_features":
            UniformDistribution(0.4, 1),
        }

        if gpu_imported:
            tune_grid["split_criterion"] = [0, 1]
        else:
            tune_grid["criterion"] = ["gini", "entropy"]
            tune_grid["class_weight"] = ["balanced", "balanced_subsample", {}]
            tune_grid["min_samples_split"] = [2, 5, 7, 9, 10]
            tune_grid["min_samples_leaf"] = [2, 3, 4, 5, 6]
            tune_distributions["min_samples_split"] = IntUniformDistribution(
                2, 10)
            tune_distributions["min_samples_leaf"] = IntUniformDistribution(
                2, 6)

        leftover_parameters_to_categorical_distributions(
            tune_grid, tune_distributions)

        super().__init__(
            id="rf",
            name="Random Forest Classifier",
            class_def=RandomForestClassifier,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap="type1",
            is_gpu_enabled=gpu_imported,
        )
        if gpu_imported:
            self.reference = get_class_name(
                cuml.ensemble.RandomForestClassifier)
Ejemplo n.º 12
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        gpu_imported = False

        from sklearn.linear_model import SGDClassifier

        if globals_dict["gpu_param"] == "force":
            from cuml import MBSGDClassifier as SGDClassifier

            logger.info("Imported cuml.MBSGDClassifier")
            gpu_imported = True
        elif globals_dict["gpu_param"]:
            try:
                from cuml import MBSGDClassifier as SGDClassifier

                logger.info("Imported cuml.MBSGDClassifier")
                gpu_imported = True
            except ImportError:
                logger.warning("Couldn't import cuml.MBSGDClassifier")

        args = {"tol": 0.001, "loss": "hinge", "penalty": "l2", "eta0": 0.001}
        tune_args = {}
        tune_grid = {
            "penalty": ["elasticnet", "l2", "l1"],
            "l1_ratio":
            np_list_arange(0.0000000001, 1, 0.01, inclusive=False),
            "alpha": [
                0.0000001,
                0.000001,
                0.0001,
                0.001,
                0.01,
                0.0002,
                0.002,
                0.02,
                0.0005,
                0.005,
                0.05,
                0.1,
                0.15,
                0.2,
                0.3,
                0.4,
                0.5,
            ],
            "fit_intercept": [True, False],
            "learning_rate": ["constant", "invscaling", "adaptive", "optimal"],
            "eta0": [0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5],
        }
        tune_distributions = {
            "l1_ratio": UniformDistribution(0.0000000001, 0.9999999999),
            "alpha": UniformDistribution(0.0000000001, 0.9999999999, log=True),
            "eta0": UniformDistribution(0.001, 0.5, log=True),
        }

        if gpu_imported:
            tune_grid["learning_rate"].remove("optimal")
            batch_size = [
                (512, 50000),
                (256, 25000),
                (128, 10000),
                (64, 5000),
                (32, 1000),
                (16, 0),
            ]
            for arg, x_len in batch_size:
                if len(globals_dict["X_train"]) >= x_len:
                    args["batch_size"] = arg
                    break
        else:
            args["random_state"] = globals_dict["seed"]
            args["n_jobs"] = globals_dict["n_jobs_param"]

        leftover_parameters_to_categorical_distributions(
            tune_grid, tune_distributions)

        super().__init__(
            id="svm",
            name="SVM - Linear Kernel",
            class_def=SGDClassifier,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap=False,
        )
Ejemplo n.º 13
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from lightgbm import LGBMClassifier
        from lightgbm.basic import LightGBMError

        args = {
            "random_state": globals_dict["seed"],
            "n_jobs": globals_dict["n_jobs_param"],
        }
        tune_args = {}
        tune_grid = {
            "num_leaves": [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 150, 200],
            "learning_rate":
            np_list_arange(0.001, 0.5, 0.001, inclusive=True),
            "n_estimators":
            np_list_arange(10, 300, 10, inclusive=True),
            "min_split_gain": [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
            "reg_alpha": [
                0.0000001,
                0.000001,
                0.0001,
                0.001,
                0.01,
                0.0005,
                0.005,
                0.05,
                0.1,
                0.15,
                0.2,
                0.3,
                0.4,
                0.5,
                0.7,
                1,
                2,
                3,
                4,
                5,
                10,
            ],
            "reg_lambda": [
                0.0000001,
                0.000001,
                0.0001,
                0.001,
                0.01,
                0.0005,
                0.005,
                0.05,
                0.1,
                0.15,
                0.2,
                0.3,
                0.4,
                0.5,
                0.7,
                1,
                2,
                3,
                4,
                5,
                10,
            ],
            "feature_fraction":
            np_list_arange(0.4, 1, 0.1, inclusive=True),
            "bagging_fraction":
            np_list_arange(0.4, 1, 0.1, inclusive=True),
            "bagging_freq": [1, 2, 3, 4, 5, 6, 7],
            "min_child_samples":
            np_list_arange(5, 100, 5, inclusive=True),
        }
        tune_distributions = {
            "num_leaves": IntUniformDistribution(10, 200),
            "learning_rate": UniformDistribution(0.000001, 0.5, log=True),
            "n_estimators": IntUniformDistribution(10, 300),
            "min_split_gain": UniformDistribution(0, 1),
            "reg_alpha": UniformDistribution(0.0000000001, 10, log=True),
            "reg_lambda": UniformDistribution(0.0000000001, 10, log=True),
            "min_data_in_leaf": IntUniformDistribution(10, 10000),
            "feature_fraction": UniformDistribution(0.4, 1),
            "bagging_fraction": UniformDistribution(0.4, 1),
            "bagging_freq": IntUniformDistribution(1, 7),
            "min_child_samples": IntUniformDistribution(5, 100),
        }

        leftover_parameters_to_categorical_distributions(
            tune_grid, tune_distributions)

        is_gpu_enabled = False
        if globals_dict["gpu_param"]:
            try:
                lgb = LGBMClassifier(device="gpu")
                lgb.fit(np.zeros((2, 2)), [0, 1])
                is_gpu_enabled = True
                del lgb
            except LightGBMError:
                is_gpu_enabled = False
                if globals_dict["gpu_param"] == "force":
                    raise RuntimeError(
                        f"LightGBM GPU mode not available. Consult https://lightgbm.readthedocs.io/en/latest/GPU-Tutorial.html."
                    )

        if is_gpu_enabled:
            args["device"] = "gpu"

        super().__init__(
            id="lightgbm",
            name="Light Gradient Boosting Machine",
            class_def=LGBMClassifier,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap="type1",
            is_gpu_enabled=is_gpu_enabled,
        )
Ejemplo n.º 14
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from xgboost import XGBClassifier

        args = {
            "random_state": globals_dict["seed"],
            "n_jobs": globals_dict["n_jobs_param"],
            "verbosity": 0,
            "booster": "gbtree",
            "tree_method": "gpu_hist" if globals_dict["gpu_param"] else "auto",
        }
        tune_args = {}
        tune_grid = {
            "learning_rate":
            np_list_arange(0.001, 0.5, 0.001, inclusive=True),
            "n_estimators":
            np_list_arange(10, 300, 10, inclusive=True),
            "subsample": [0.2, 0.3, 0.5, 0.7, 0.9, 1],
            "max_depth":
            np_list_arange(1, 11, 1, inclusive=True),
            "colsample_bytree": [0.5, 0.7, 0.9, 1],
            "min_child_weight": [1, 2, 3, 4],
            "reg_alpha": [
                0.0000001,
                0.000001,
                0.0001,
                0.001,
                0.01,
                0.0005,
                0.005,
                0.05,
                0.1,
                0.15,
                0.2,
                0.3,
                0.4,
                0.5,
                0.7,
                1,
                2,
                3,
                4,
                5,
                10,
            ],
            "reg_lambda": [
                0.0000001,
                0.000001,
                0.0001,
                0.001,
                0.01,
                0.0005,
                0.005,
                0.05,
                0.1,
                0.15,
                0.2,
                0.3,
                0.4,
                0.5,
                0.7,
                1,
                2,
                3,
                4,
                5,
                10,
            ],
            "scale_pos_weight":
            np_list_arange(0, 50, 0.1, inclusive=True),
        }
        tune_distributions = {
            "learning_rate": UniformDistribution(0.000001, 0.5, log=True),
            "n_estimators": IntUniformDistribution(10, 300),
            "subsample": UniformDistribution(0.2, 1),
            "max_depth": IntUniformDistribution(1, 11),
            "colsample_bytree": UniformDistribution(0.5, 1),
            "min_child_weight": IntUniformDistribution(1, 4),
            "reg_alpha": UniformDistribution(0.0000000001, 10, log=True),
            "reg_lambda": UniformDistribution(0.0000000001, 10, log=True),
            "scale_pos_weight": UniformDistribution(1, 50),
        }

        leftover_parameters_to_categorical_distributions(
            tune_grid, tune_distributions)

        super().__init__(
            id="xgboost",
            name="Extreme Gradient Boosting",
            class_def=XGBClassifier,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap="type2",
            is_gpu_enabled=bool(globals_dict["gpu_param"]),
        )
Ejemplo n.º 15
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        try:
            import catboost
        except ImportError:
            logger.warning("Couldn't import catboost.CatBoostClassifier")
            self.active = False
            return

        catboost_version = tuple(
            [int(x) for x in catboost.__version__.split(".")])
        if catboost_version < (0, 23, 2):
            logger.warning(
                f"Wrong catboost version. Expected catboost>=0.23.2, got catboost=={catboost_version}"
            )
            self.active = False
            return

        from catboost import CatBoostClassifier

        # suppress output
        logging.getLogger("catboost").setLevel(logging.ERROR)

        use_gpu = globals_dict["gpu_param"] == "force" or (
            globals_dict["gpu_param"]
            and len(globals_dict["X_train"]) >= 50000)

        args = {
            "random_state": globals_dict["seed"],
            "verbose": False,
            "thread_count": globals_dict["n_jobs_param"],
            "task_type": "GPU" if use_gpu else "CPU",
            "border_count": 32 if use_gpu else 254,
        }
        tune_args = {}
        tune_grid = {
            "depth": list(range(1, 12)),
            "n_estimators": np_list_arange(10, 300, 10, inclusive=True),
            "random_strength": np_list_arange(0, 0.8, 0.1, inclusive=True),
            "l2_leaf_reg":
            [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 50, 100, 200],
        }
        tune_distributions = {
            "depth": IntUniformDistribution(1, 11),
            "n_estimators": IntUniformDistribution(10, 300),
            "random_strength": UniformDistribution(0, 0.8),
            "l2_leaf_reg": IntUniformDistribution(1, 200, log=True),
        }

        if use_gpu:
            tune_grid["depth"] = list(range(1, 9))
            tune_distributions["depth"] = (IntUniformDistribution(1, 8), )

        leftover_parameters_to_categorical_distributions(
            tune_grid, tune_distributions)

        super().__init__(
            id="catboost",
            name="CatBoost Classifier",
            class_def=CatBoostClassifier,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap="type2",
            is_gpu_enabled=use_gpu,
        )
Ejemplo n.º 16
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        try:
            import xgboost
        except ImportError:
            logger.warning("Couldn't import xgboost.XGBClassifier")
            self.active = False
            return

        xgboost_version = tuple(
            [int(x) for x in xgboost.__version__.split(".")])
        if xgboost_version < (1, 1, 0):
            logger.warning(
                f"Wrong xgboost version. Expected xgboost>=1.1.0, got xgboost=={xgboost_version}"
            )
            self.active = False
            return

        from xgboost import XGBClassifier

        args = {
            "random_state": globals_dict["seed"],
            "n_jobs": globals_dict["n_jobs_param"],
            "verbosity": 0,
            "booster": "gbtree",
            "tree_method": "gpu_hist" if globals_dict["gpu_param"] else "auto",
        }
        tune_args = {}
        tune_grid = {
            "learning_rate":
            np_list_arange(0.001, 0.5, 0.001, inclusive=True),
            "n_estimators":
            np_list_arange(10, 300, 10, inclusive=True),
            "subsample": [0.2, 0.3, 0.5, 0.7, 0.9, 1],
            "max_depth":
            np_list_arange(1, 11, 1, inclusive=True),
            "colsample_bytree": [0.5, 0.7, 0.9, 1],
            "min_child_weight": [1, 2, 3, 4],
            "reg_alpha": [
                0.0000001,
                0.000001,
                0.0001,
                0.001,
                0.01,
                0.0005,
                0.005,
                0.05,
                0.1,
                0.15,
                0.2,
                0.3,
                0.4,
                0.5,
                0.7,
                1,
                2,
                3,
                4,
                5,
                10,
            ],
            "reg_lambda": [
                0.0000001,
                0.000001,
                0.0001,
                0.001,
                0.01,
                0.0005,
                0.005,
                0.05,
                0.1,
                0.15,
                0.2,
                0.3,
                0.4,
                0.5,
                0.7,
                1,
                2,
                3,
                4,
                5,
                10,
            ],
            "scale_pos_weight":
            np_list_arange(0, 50, 0.1, inclusive=True),
        }
        tune_distributions = {
            "learning_rate": UniformDistribution(0.000001, 0.5, log=True),
            "n_estimators": IntUniformDistribution(10, 300),
            "subsample": UniformDistribution(0.2, 1),
            "max_depth": IntUniformDistribution(1, 11),
            "colsample_bytree": UniformDistribution(0.5, 1),
            "min_child_weight": IntUniformDistribution(1, 4),
            "reg_alpha": UniformDistribution(0.0000000001, 10, log=True),
            "reg_lambda": UniformDistribution(0.0000000001, 10, log=True),
            "scale_pos_weight": UniformDistribution(1, 50),
        }

        leftover_parameters_to_categorical_distributions(
            tune_grid, tune_distributions)

        super().__init__(
            id="xgboost",
            name="Extreme Gradient Boosting",
            class_def=XGBClassifier,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap="type2",
            is_gpu_enabled=bool(globals_dict["gpu_param"]),
        )