Ejemplo n.º 1
0
def _auto_patch_spark():
    import os
    import logging

    # Attach a usage logger.
    logger_module = os.getenv("KOALAS_USAGE_LOGGER", "")
    if logger_module != "":
        try:
            from pyspark.pandas import usage_logging

            usage_logging.attach(logger_module)
        except Exception as e:
            logger = logging.getLogger("pyspark.pandas.usage_logger")
            logger.warning(
                "Tried to attach usage logger `{}`, but an exception was raised: {}"
                .format(logger_module, str(e)))

    # Autopatching is on by default.
    x = os.getenv("SPARK_KOALAS_AUTOPATCH", "true")
    if x.lower() in ("true", "1", "enabled"):
        logger = logging.getLogger("spark")
        logger.info(
            "Patching spark automatically. You can disable it by setting "
            "SPARK_KOALAS_AUTOPATCH=false in your environment")

        from pyspark.sql import dataframe as df

        df.DataFrame.to_koalas = DataFrame.to_koalas
Ejemplo n.º 2
0
def _auto_patch_spark() -> None:
    import os
    import logging

    # Attach a usage logger.
    logger_module = os.getenv("KOALAS_USAGE_LOGGER", "")
    if logger_module != "":
        try:
            from pyspark.pandas import usage_logging

            usage_logging.attach(logger_module)
        except Exception as e:
            logger = logging.getLogger("pyspark.pandas.usage_logger")
            logger.warning(
                "Tried to attach usage logger `{}`, but an exception was raised: {}"
                .format(logger_module, str(e)))