Beispiel #1
0
def _create_container_azure(container_name: str):
    """
    Creates a storage container on Azure Platform. gets the connection string from the environment variables.

    Example
    -------
    >>>  container_client = _create_container_azure(container_name='test-pycaret-azure')

    Parameters
    ----------
    container_name : str
        Name of the storage container to be created if does not exists already.

    Returns
    -------
    cotainer_client

    """

    logger = get_logger()

    # Create the container
    import os, uuid
    from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient

    connect_str = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
    blob_service_client = BlobServiceClient.from_connection_string(connect_str)
    container_client = blob_service_client.create_container(container_name)
    return container_client
Beispiel #2
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from sklearn.ensemble import BaggingClassifier

        args = {
            "random_state": globals_dict["seed"],
            "n_jobs": 1 if globals_dict["gpu_param"] else None,
        }
        tune_args = {}
        tune_grid = {
            "bootstrap": [True, False],
            "bootstrap_features": [True, False],
            "max_features": np_list_arange(0.4, 1, 0.1, inclusive=True),
            "max_samples": np_list_arange(0.4, 1, 0.1, inclusive=True),
        }
        tune_distributions = {
            "max_features": UniformDistribution(0.4, 1),
            "max_samples": UniformDistribution(0.4, 1),
        }

        leftover_parameters_to_categorical_distributions(tune_grid, tune_distributions)

        super().__init__(
            id="Bagging",
            name="Bagging Classifier",
            class_def=BaggingClassifier,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap=False,
            is_special=True,
            is_gpu_enabled=False,
        )
Beispiel #3
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from sklearn.gaussian_process import GaussianProcessClassifier

        args = {
            "copy_X_train": False,
            "random_state": globals_dict["seed"],
            "n_jobs": globals_dict["n_jobs_param"],
        }
        tune_args = {}
        tune_grid = {
            "max_iter_predict": [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000,]
        }
        tune_distributions = {"max_iter_predict": IntUniformDistribution(100, 1000)}

        super().__init__(
            id="gpc",
            name="Gaussian Process Classifier",
            class_def=GaussianProcessClassifier,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap=False,
            is_turbo=False,
        )
Beispiel #4
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from sklearn.calibration import CalibratedClassifierCV

        args = {}
        tune_args = {}
        tune_grid = {}
        tune_distributions = {}

        leftover_parameters_to_categorical_distributions(
            tune_grid, tune_distributions)

        super().__init__(
            id="CalibratedCV",
            name="Calibrated Classifier CV",
            class_def=CalibratedClassifierCV,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap=False,
            is_special=True,
            is_gpu_enabled=False,
        )
Beispiel #5
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from sklearn.ensemble import AdaBoostClassifier

        args = {"random_state": globals_dict["seed"]}
        tune_args = {}
        tune_grid = {
            "n_estimators": np_list_arange(10, 300, 10, inclusive=True),
            "learning_rate": np_list_arange(0.001, 0.5, 0.001, inclusive=True),
            "algorithm": ["SAMME", "SAMME.R"],
        }
        tune_distributions = {
            "n_estimators": IntUniformDistribution(10, 300),
            "learning_rate": UniformDistribution(0.000001, 0.5, log=True),
        }

        leftover_parameters_to_categorical_distributions(
            tune_grid, tune_distributions)

        super().__init__(
            id="ada",
            name="Ada Boost Classifier",
            class_def=AdaBoostClassifier,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap=False,
        )
Beispiel #6
0
def _create_bucket_gcp(project_name: str, bucket_name: str):
    """
    Creates a bucket on Google Cloud Platform if it does not exists already

    Example
    -------
    >>> _create_bucket_gcp(project_name='GCP-Essentials', bucket_name='test-pycaret-gcp')

    Parameters
    ----------
    project_name : str
        A Project name on GCP Platform (Must have been created from console).

    bucket_name : str
        Name of the storage bucket to be created if does not exists already.

    Returns
    -------
    None
    """

    logger = get_logger()

    # bucket_name = "your-new-bucket-name"
    from google.cloud import storage

    storage_client = storage.Client(project_name)

    buckets = storage_client.list_buckets()

    if bucket_name not in buckets:
        bucket = storage_client.create_bucket(bucket_name)
        logger.info("Bucket {} created".format(bucket.name))
    else:
        raise FileExistsError("{} already exists".format(bucket_name))
Beispiel #7
0
def _upload_blob_gcp(
    project_name: str,
    bucket_name: str,
    source_file_name: str,
    destination_blob_name: str,
):
    """
    Upload blob to GCP storage bucket

    Example
    -------
    >>> _upload_blob_gcp(project_name='GCP-Essentials', bucket_name='test-pycaret-gcp', \
                        source_file_name='model-101.pkl', destination_blob_name='model-101.pkl')

    Parameters
    ----------
    project_name : str
        A Project name on GCP Platform (Must have been created from console).

    bucket_name : str
        Name of the storage bucket to be created if does not exists already.

    source_file_name : str
        A blob/file name to copy to GCP

    destination_blob_name : str
        Name of the destination file to be stored on GCP

    Returns
    -------
    None
    """

    logger = get_logger()

    # bucket_name = "your-bucket-name"
    # source_file_name = "local/path/to/file"
    # destination_blob_name = "storage-object-name"
    from google.cloud import storage

    import google.auth.exceptions
    try:
        storage_client = storage.Client(project_name)
    except google.auth.exceptions.DefaultCredentialsError:
        logger.error(
            'Environment variable GOOGLE_APPLICATION_CREDENTIALS not set. For more information,'
            ' please see https://cloud.google.com/docs/authentication/getting-started'
        )
        raise ValueError(
            'Environment variable GOOGLE_APPLICATION_CREDENTIALS not set. For more information,'
            ' please see https://cloud.google.com/docs/authentication/getting-started'
        )

    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_filename(source_file_name)

    logger.info("File {} uploaded to {}.".format(source_file_name,
                                                 destination_blob_name))
Beispiel #8
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from pycaret.internal.tunable import TunableVotingClassifier as VotingClassifier

        args = {}
        tune_args = {}
        tune_grid = {}
        tune_distributions = {}

        # VotingClassifier is a special case. Its weights can be tuned, but we do not know how many of them will be there
        # before it is initiated. Therefore, code to handle it will be added directly to tune_model().

        leftover_parameters_to_categorical_distributions(
            tune_grid, tune_distributions)

        super().__init__(
            id="Voting",
            name="Voting Classifier",
            class_def=VotingClassifier,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap=False,
            is_special=True,
            is_gpu_enabled=False,
        )
Beispiel #9
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from sklearn.ensemble import ExtraTreesClassifier

        args = {
            "random_state": globals_dict["seed"],
            "n_jobs": globals_dict["n_jobs_param"],
        }
        tune_args = {}
        tune_grid = {
            "n_estimators": np_list_arange(10, 300, 10, inclusive=True),
            "criterion": ["gini", "entropy"],
            "max_depth": np_list_arange(1, 11, 1, inclusive=True),
            "min_impurity_decrease": [
                0,
                0.0001,
                0.001,
                0.01,
                0.0002,
                0.002,
                0.02,
                0.0005,
                0.005,
                0.05,
                0.1,
                0.2,
                0.3,
                0.4,
                0.5,
            ],
            "max_features": [1.0, "sqrt", "log2"],
            "bootstrap": [True, False],
            "min_samples_split": [2, 5, 7, 9, 10],
            "min_samples_leaf": [2, 3, 4, 5, 6],
            "class_weight": ["balanced", "balanced_subsample", {}],
        }
        tune_distributions = {
            "n_estimators": IntUniformDistribution(10, 300),
            "max_depth": IntUniformDistribution(1, 11),
            "min_samples_split": IntUniformDistribution(2, 10),
            "min_samples_leaf": IntUniformDistribution(1, 5),
            "max_features": UniformDistribution(0.4, 1),
            "min_impurity_decrease": UniformDistribution(0.000000001, 0.5, log=True),
        }

        leftover_parameters_to_categorical_distributions(tune_grid, tune_distributions)

        super().__init__(
            id="et",
            name="Extra Trees Classifier",
            class_def=ExtraTreesClassifier,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap="type1",
        )
Beispiel #10
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from sklearn.tree import DecisionTreeClassifier

        args = {"random_state": globals_dict["seed"]}
        tune_args = {}
        tune_grid = {
            "max_depth":
            np_list_arange(1, 16, 1, inclusive=True),
            "max_features": [1.0, "sqrt", "log2"],
            "min_samples_leaf": [2, 3, 4, 5, 6],
            "min_samples_split": [2, 5, 7, 9, 10],
            "criterion": ["gini", "entropy"],
            "min_impurity_decrease": [
                0,
                0.0001,
                0.001,
                0.01,
                0.0002,
                0.002,
                0.02,
                0.0005,
                0.005,
                0.05,
                0.1,
                0.2,
                0.3,
                0.4,
                0.5,
            ],
        }
        tune_distributions = {
            "max_depth":
            IntUniformDistribution(1, 16),
            "max_features":
            UniformDistribution(0.4, 1),
            "min_samples_leaf":
            IntUniformDistribution(2, 6),
            "min_samples_split":
            IntUniformDistribution(2, 10),
            "min_impurity_decrease":
            UniformDistribution(0.000000001, 0.5, log=True),
        }

        leftover_parameters_to_categorical_distributions(
            tune_grid, tune_distributions)

        super().__init__(
            id="dt",
            name="Decision Tree Classifier",
            class_def=DecisionTreeClassifier,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap="type1",
        )
Beispiel #11
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from sklearn.ensemble import GradientBoostingClassifier

        args = {"random_state": globals_dict["seed"]}
        tune_args = {}
        tune_grid = {
            "n_estimators": np_list_arange(10, 300, 10, inclusive=True),
            "learning_rate": np_list_arange(0.001, 0.5, 0.001, inclusive=True),
            "subsample": np_list_arange(0.2, 1, 0.05, inclusive=True),
            "min_samples_split": [2, 4, 5, 7, 9, 10],
            "min_samples_leaf": [1, 2, 3, 4, 5],
            "max_depth": np_list_arange(1, 11, 1, inclusive=True),
            "min_impurity_decrease": [
                0,
                0.0001,
                0.001,
                0.01,
                0.0002,
                0.002,
                0.02,
                0.0005,
                0.005,
                0.05,
                0.1,
                0.2,
                0.3,
                0.4,
                0.5,
            ],
            "max_features": [1.0, "sqrt", "log2"],
        }
        tune_distributions = {
            "n_estimators": IntUniformDistribution(10, 300),
            "learning_rate": UniformDistribution(0.000001, 0.5, log=True),
            "subsample": UniformDistribution(0.2, 1),
            "min_samples_split": IntUniformDistribution(2, 10),
            "min_samples_leaf": IntUniformDistribution(1, 5),
            "max_depth": IntUniformDistribution(1, 11),
            "min_impurity_decrease": UniformDistribution(0.000000001, 0.5, log=True),
            "max_features": UniformDistribution(0.4, 1),
        }

        leftover_parameters_to_categorical_distributions(tune_grid, tune_distributions)

        super().__init__(
            id="gbc",
            name="Gradient Boosting Classifier",
            class_def=GradientBoostingClassifier,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap=False,
        )
Beispiel #12
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        gpu_imported = False

        from sklearn.linear_model import RidgeClassifier

        if globals_dict["gpu_param"] == "force":
            import cuml.linear_model

            logger.info("Imported cuml.linear_model")
            gpu_imported = True
        elif globals_dict["gpu_param"]:
            try:
                import cuml.linear_model

                logger.info("Imported cuml.linear_model")
                gpu_imported = True
            except ImportError:
                logger.warning("Couldn't import cuml.linear_model")

        args = {}
        tune_args = {}
        tune_grid = {}
        tune_distributions = {}

        if gpu_imported:
            RidgeClassifier = pycaret.internal.cuml_wrappers.get_ridge_classifier(
            )
        else:
            args = {"random_state": globals_dict["seed"]}

        tune_grid = {
            "normalize": [True, False],
        }

        tune_grid["alpha"] = np_list_arange(0.01, 10, 0.01, inclusive=False)
        tune_grid["fit_intercept"] = [True, False]
        tune_distributions["alpha"] = UniformDistribution(0.001, 10)

        leftover_parameters_to_categorical_distributions(
            tune_grid, tune_distributions)

        super().__init__(
            id="ridge",
            name="Ridge Classifier",
            class_def=RidgeClassifier,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap=False,
            is_gpu_enabled=gpu_imported,
        )
        if gpu_imported:
            self.reference = get_class_name(cuml.linear_model.Ridge)
Beispiel #13
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from sklearn.neural_network import MLPClassifier
        from pycaret.internal.tunable import TunableMLPClassifier

        args = {"random_state": globals_dict["seed"], "max_iter": 500}
        tune_args = {}
        tune_grid = {
            "learning_rate": ["constant", "invscaling", "adaptive"],
            "alpha": [
                0.0000001,
                0.000001,
                0.0001,
                0.001,
                0.01,
                0.0005,
                0.005,
                0.05,
                0.1,
                0.15,
                0.2,
                0.3,
                0.4,
                0.5,
                0.7,
                0.9,
            ],
            "hidden_layer_size_0": [50, 100],
            "hidden_layer_size_1": [0, 50, 100],
            "hidden_layer_size_2": [0, 50, 100],
            "activation": ["tanh", "identity", "logistic", "relu"],
        }
        tune_distributions = {
            "alpha": UniformDistribution(0.0000000001, 0.9999999999, log=True),
            "hidden_layer_size_0": IntUniformDistribution(50, 100),
            "hidden_layer_size_1": IntUniformDistribution(0, 100),
            "hidden_layer_size_2": IntUniformDistribution(0, 100),
        }

        leftover_parameters_to_categorical_distributions(
            tune_grid, tune_distributions)

        super().__init__(
            id="mlp",
            name="MLP Classifier",
            class_def=MLPClassifier,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap=False,
            is_turbo=False,
            tunable=TunableMLPClassifier,
        )
Beispiel #14
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        gpu_imported = False

        from sklearn.svm import SVC

        if globals_dict["gpu_param"] == "force":
            from cuml.svm import SVC

            logger.info("Imported cuml.svm.SVC")
            gpu_imported = True
        elif globals_dict["gpu_param"]:
            try:
                from cuml.svm import SVC

                logger.info("Imported cuml.svm.SVC")
                gpu_imported = True
            except ImportError:
                logger.warning("Couldn't import cuml.svm.SVC")

        args = {
            "gamma": "auto",
            "C": 1.0,
            "probability": True,
            "kernel": "rbf",
            "random_state": globals_dict["seed"],
        }
        tune_args = {}
        tune_grid = {
            "C": np_list_arange(0, 50, 0.01, inclusive=True),
            "class_weight": ["balanced", {}],
        }
        tune_distributions = {
            "C": UniformDistribution(0, 50),
        }

        leftover_parameters_to_categorical_distributions(
            tune_grid, tune_distributions)

        if gpu_imported:
            SVC = get_svc_classifier()

        super().__init__(
            id="rbfsvm",
            name="SVM - Radial Kernel",
            class_def=SVC,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap=False,
            is_turbo=False,
        )
Beispiel #15
0
def _download_blob_gcp(
    project_name: str,
    bucket_name: str,
    source_blob_name: str,
    destination_file_name: str,
):
    """
    Download a blob from GCP storage bucket

    Example
    -------
    >>> _download_blob_gcp(project_name='GCP-Essentials', bucket_name='test-pycaret-gcp', \
                          source_blob_name='model-101.pkl', destination_file_name='model-101.pkl')

    Parameters
    ----------
    project_name : str
        A Project name on GCP Platform (Must have been created from console).

    bucket_name : str
        Name of the storage bucket to be created if does not exists already.

    source_blob_name : str
        A blob/file name to download from GCP bucket

    destination_file_name : str
        Name of the destination file to be stored locally

    Returns
    -------
    Model Object
    """

    logger = get_logger()

    # bucket_name = "your-bucket-name"
    # source_blob_name = "storage-object-name"
    # destination_file_name = "local/path/to/file"
    from google.cloud import storage

    storage_client = storage.Client(project_name)

    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(source_blob_name)

    if destination_file_name is not None:
        blob.download_to_filename(destination_file_name)

        logger.info(
            "Blob {} downloaded to {}.".format(source_blob_name, destination_file_name)
        )

    return blob
Beispiel #16
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from sklearn.naive_bayes import GaussianNB

        args = {}
        tune_args = {}
        tune_grid = {
            "var_smoothing": [
                0.000000001,
                0.000000002,
                0.000000005,
                0.000000008,
                0.000000009,
                0.0000001,
                0.0000002,
                0.0000003,
                0.0000005,
                0.0000007,
                0.0000009,
                0.00001,
                0.001,
                0.002,
                0.003,
                0.004,
                0.005,
                0.007,
                0.009,
                0.004,
                0.005,
                0.006,
                0.007,
                0.008,
                0.009,
                0.01,
                0.1,
                1,
            ]
        }
        tune_distributions = {
            "var_smoothing": UniformDistribution(0.000000001, 1, log=True)
        }

        super().__init__(
            id="nb",
            name="Naive Bayes",
            class_def=GaussianNB,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap=False,
        )
Beispiel #17
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from catboost import CatBoostClassifier

        # suppress output
        logging.getLogger("catboost").setLevel(logging.ERROR)

        use_gpu = globals_dict["gpu_param"] == "force" or (
            globals_dict["gpu_param"]
            and len(globals_dict["X_train"]) >= 50000)

        args = {
            "random_state": globals_dict["seed"],
            "verbose": False,
            "thread_count": globals_dict["n_jobs_param"],
            "task_type": "GPU" if use_gpu else "CPU",
            "border_count": 32 if use_gpu else 254,
        }
        tune_args = {}
        tune_grid = {
            "depth": list(range(1, 12)),
            "n_estimators": np_list_arange(10, 300, 10, inclusive=True),
            "random_strength": np_list_arange(0, 0.8, 0.1, inclusive=True),
            "l2_leaf_reg":
            [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 50, 100, 200],
        }
        tune_distributions = {
            "depth": IntUniformDistribution(1, 11),
            "n_estimators": IntUniformDistribution(10, 300),
            "random_strength": UniformDistribution(0, 0.8),
            "l2_leaf_reg": IntUniformDistribution(1, 200, log=True),
        }

        if use_gpu:
            tune_grid["depth"] = list(range(1, 9))
            tune_distributions["depth"] = (IntUniformDistribution(1, 8), )

        leftover_parameters_to_categorical_distributions(
            tune_grid, tune_distributions)

        super().__init__(
            id="catboost",
            name="CatBoost Classifier",
            class_def=CatBoostClassifier,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap="type2",
            is_gpu_enabled=use_gpu,
        )
Beispiel #18
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        gpu_imported = False

        from sklearn.linear_model import LogisticRegression

        if globals_dict["gpu_param"] == "force":
            from cuml.linear_model import LogisticRegression

            logger.info("Imported cuml.linear_model.LogisticRegression")
            gpu_imported = True
        elif globals_dict["gpu_param"]:
            try:
                from cuml.linear_model import LogisticRegression

                logger.info("Imported cuml.linear_model.LogisticRegression")
                gpu_imported = True
            except ImportError:
                logger.warning(
                    "Couldn't import cuml.linear_model.LogisticRegression")

        args = {"max_iter": 1000}
        tune_args = {}
        tune_grid = {}
        tune_distributions = {}

        # common
        tune_grid["penalty"] = ["l2", "none"]
        tune_grid["C"] = np_list_arange(0, 10, 0.001, inclusive=True)

        if gpu_imported:
            tune_grid["penalty"] += ["l1"]
        else:
            args["random_state"] = globals_dict["seed"]

            tune_grid["class_weight"] = ["balanced", {}]

        tune_distributions["C"] = UniformDistribution(0, 10)
        leftover_parameters_to_categorical_distributions(
            tune_grid, tune_distributions)

        super().__init__(
            id="lr",
            name="Logistic Regression",
            class_def=LogisticRegression,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap=False,
        )
Beispiel #19
0
def _upload_blob_gcp(
    project_name: str,
    bucket_name: str,
    source_file_name: str,
    destination_blob_name: str,
):

    """
    Upload blob to GCP storage bucket

    Example
    -------
    >>> _upload_blob_gcp(project_name='GCP-Essentials', bucket_name='test-pycaret-gcp', \
                        source_file_name='model-101.pkl', destination_blob_name='model-101.pkl')

    Parameters
    ----------
    project_name : str
        A Project name on GCP Platform (Must have been created from console).

    bucket_name : str
        Name of the storage bucket to be created if does not exists already.

    source_file_name : str
        A blob/file name to copy to GCP

    destination_blob_name : str
        Name of the destination file to be stored on GCP

    Returns
    -------
    None
    """

    logger = get_logger()

    # bucket_name = "your-bucket-name"
    # source_file_name = "local/path/to/file"
    # destination_blob_name = "storage-object-name"
    from google.cloud import storage

    storage_client = storage.Client(project_name)
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_filename(source_file_name)

    logger.info(
        "File {} uploaded to {}.".format(source_file_name, destination_blob_name)
    )
Beispiel #20
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        gpu_imported = False

        from sklearn.neighbors import KNeighborsClassifier

        if globals_dict["gpu_param"] == "force":
            from cuml.neighbors import KNeighborsClassifier

            logger.info("Imported cuml.neighbors.KNeighborsClassifier")
            gpu_imported = True
        elif globals_dict["gpu_param"]:
            try:
                from cuml.neighbors import KNeighborsClassifier

                logger.info("Imported cuml.neighbors.KNeighborsClassifier")
                gpu_imported = True
            except ImportError:
                logger.warning(
                    "Couldn't import cuml.neighbors.KNeighborsClassifier")

        args = {}
        tune_args = {}
        tune_grid = {}
        tune_distributions = {}

        # common
        tune_grid["n_neighbors"] = range(1, 51)
        tune_grid["weights"] = ["uniform"]
        tune_grid["metric"] = ["minkowski", "euclidean", "manhattan"]

        if not gpu_imported:
            args["n_jobs"] = globals_dict["n_jobs_param"]
            tune_grid["weights"] += ["distance"]

        tune_distributions["n_neighbors"] = IntUniformDistribution(1, 51)
        leftover_parameters_to_categorical_distributions(
            tune_grid, tune_distributions)

        super().__init__(
            id="knn",
            name="K Neighbors Classifier",
            class_def=KNeighborsClassifier,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap=False,
        )
Beispiel #21
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

        args = {}
        tune_args = {}
        tune_grid = {
            "solver": ["lsqr", "eigen"],
            "shrinkage": [
                "empirical",
                "auto",
                0.0001,
                0.001,
                0.01,
                0.0005,
                0.005,
                0.05,
                0.1,
                0.2,
                0.3,
                0.4,
                0.5,
                0.6,
                0.7,
                0.8,
                0.9,
                1,
            ],
        }
        tune_distributions = {
            "shrinkage": UniformDistribution(0.0001, 1, log=True),
        }

        leftover_parameters_to_categorical_distributions(
            tune_grid, tune_distributions)

        super().__init__(
            id="lda",
            name="Linear Discriminant Analysis",
            class_def=LinearDiscriminantAnalysis,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            shap=False,
        )
Beispiel #22
0
def _create_bucket_gcp(project_name: str, bucket_name: str):
    """
    Creates a bucket on Google Cloud Platform if it does not exists already

    Example
    -------
    >>> _create_bucket_gcp(project_name='GCP-Essentials', bucket_name='test-pycaret-gcp')

    Parameters
    ----------
    project_name : str
        A Project name on GCP Platform (Must have been created from console).

    bucket_name : str
        Name of the storage bucket to be created if does not exists already.

    Returns
    -------
    None
    """

    logger = get_logger()

    # bucket_name = "your-new-bucket-name"
    from google.cloud import storage

    import google.auth.exceptions
    try:
        storage_client = storage.Client(project_name)
    except google.auth.exceptions.DefaultCredentialsError:
        logger.error(
            'Environment variable GOOGLE_APPLICATION_CREDENTIALS not set. For more information,'
            ' please see https://cloud.google.com/docs/authentication/getting-started'
        )
        raise ValueError(
            'Environment variable GOOGLE_APPLICATION_CREDENTIALS not set. For more information,'
            ' please see https://cloud.google.com/docs/authentication/getting-started'
        )

    buckets = storage_client.list_buckets()

    if bucket_name not in buckets:
        bucket = storage_client.create_bucket(bucket_name)
        logger.info("Bucket {} created".format(bucket.name))
    else:
        raise FileExistsError("{} already exists".format(bucket_name))
Beispiel #23
0
def _download_blob_azure(container_name: str, source_blob_name: str,
                         destination_file_name: str):
    """
    Download blob from Azure storage  container

    Example
    -------
    >>>  _download_blob_azure(container_name='test-pycaret-azure', source_blob_name='model-101.pkl', \
                             destination_file_name='model-101.pkl')

    Parameters
    ----------
    container_name : str
        Name of the storage bucket to be created if does not exists already.

    source_blob_name : str
        A blob/file name to download from Azure storage container

    destination_file_name : str
        Name of the destination file to be stored locally

    """

    logger = get_logger()

    import os
    from azure.storage.blob import BlobServiceClient

    connect_str = os.getenv("AZURE_STORAGE_CONNECTION_STRING")

    if connect_str is None:
        logger.error(
            'Environment variable AZURE_STORAGE_CONNECTION_STRING not set')
        raise ValueError(
            'Environment variable AZURE_STORAGE_CONNECTION_STRING not set')

    blob_service_client = BlobServiceClient.from_connection_string(connect_str)
    # Create a blob client using the local file name as the name for the blob
    blob_client = blob_service_client.get_blob_client(container=container_name,
                                                      blob=source_blob_name)

    if destination_file_name is not None:
        with open(destination_file_name, "wb") as download_file:
            download_file.write(blob_client.download_blob().readall())
Beispiel #24
0
def _upload_blob_azure(container_name: str, source_file_name: str,
                       destination_blob_name: str):
    """
    Upload blob to Azure storage  container

    Example
    -------
    >>>  _upload_blob_azure(container_name='test-pycaret-azure', source_file_name='model-101.pkl', \
                           destination_blob_name='model-101.pkl')

    Parameters
    ----------
    container_name : str
        Name of the storage bucket to be created if does not exists already.

    source_file_name : str
        A blob/file name to copy to Azure

    destination_blob_name : str
        Name of the destination file to be stored on Azure

    """

    logger = get_logger()

    import os
    from azure.storage.blob import BlobServiceClient

    connect_str = os.getenv("AZURE_STORAGE_CONNECTION_STRING")

    if connect_str is None:
        logger.error(
            'Environment variable AZURE_STORAGE_CONNECTION_STRING not set')
        raise ValueError(
            'Environment variable AZURE_STORAGE_CONNECTION_STRING not set')

    blob_service_client = BlobServiceClient.from_connection_string(connect_str)
    # Create a blob client using the local file name as the name for the blob
    blob_client = blob_service_client.get_blob_client(
        container=container_name, blob=destination_blob_name)

    # Upload the created file
    with open(source_file_name, "rb") as data:
        blob_client.upload_blob(data, overwrite=True)
Beispiel #25
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from pyod.models.sod import SOD

        args = {}
        tune_args = {}
        tune_grid = {}
        tune_distributions = {}

        super().__init__(
            id="sod",
            name="Subspace Outlier Detection",
            class_def=SOD,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
        )
Beispiel #26
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from pyod.models.sos import SOS

        args = {}
        tune_args = {}
        tune_grid = {}
        tune_distributions = {}

        super().__init__(
            id="sos",
            name="Stochastic Outlier Selection",
            class_def=SOS,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
        )
Beispiel #27
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from sklearn.cluster import Birch

        args = {"n_clusters": _DEFAULT_N_CLUSTERS}
        tune_args = {}
        tune_grid = {}
        tune_distributions = {}

        super().__init__(
            id="birch",
            name="Birch Clustering",
            class_def=Birch,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
        )
Beispiel #28
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from sklearn.cluster import OPTICS

        args = {"n_jobs": globals_dict["n_jobs_param"]}
        tune_args = {}
        tune_grid = {}
        tune_distributions = {}

        super().__init__(
            id="optics",
            name="OPTICS Clustering",
            class_def=OPTICS,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
        )
Beispiel #29
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        gpu_imported = False
        from sklearn.cluster import KMeans

        if globals_dict["gpu_param"] == "force":
            from cuml.cluster import KMeans

            logger.info("Imported cuml.cluster.KMeans")
            gpu_imported = True
        elif globals_dict["gpu_param"]:
            try:
                from cuml.cluster import KMeans

                logger.info("Imported cuml.cluster.KMeans")
                gpu_imported = True
            except ImportError:
                logger.warning("Couldn't import cuml.cluster.KMeans")

        args = {
            "n_clusters": _DEFAULT_N_CLUSTERS,
            "random_state": globals_dict["seed"],
        }
        tune_args = {}
        tune_grid = {}
        tune_distributions = {}

        if not gpu_imported:
            args["n_jobs"] = globals_dict["n_jobs_param"]
        else:
            KMeans = get_kmeans()

        super().__init__(
            id="kmeans",
            name="K-Means Clustering",
            class_def=KMeans,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
            is_gpu_enabled=gpu_imported,
        )
Beispiel #30
0
    def __init__(self, globals_dict: dict) -> None:
        logger = get_logger()
        np.random.seed(globals_dict["seed"])
        from sklearn.cluster import AffinityPropagation

        args = {}
        tune_args = {}
        tune_grid = {}
        tune_distributions = {}

        super().__init__(
            id="ap",
            name="Affinity Propagation",
            class_def=AffinityPropagation,
            args=args,
            tune_grid=tune_grid,
            tune_distribution=tune_distributions,
            tune_args=tune_args,
        )