Example #1
0
    def __getattr__(self, attribute_name):
        if self._db_utils is None:
            if not is_databricks():
                # pylint: disable=import-outside-toplevel
                from databricksbundle.spark.java_home_setter import set_java_home

                # pylint: disable=import-outside-toplevel
                from databricksbundle.spark.existing_spark_cleanup import (
                    clean_existing_spark_config,
                )

                set_java_home()
                clean_existing_spark_config()

                if platform.system() == "Windows":
                    from databricksbundle.spark.hadoop_home_setter import (
                        set_hadoop_home_env_var,
                    )

                    set_hadoop_home_env_var()

            self._db_utils = self._factory_callback()

        if hasattr(self._db_utils, attribute_name) is False:
            raise AttributeError(attribute_name)

        attr = getattr(self._db_utils, attribute_name)

        if isinstance(attr, types.FunctionType) is False:
            return attr

        def wrapper(*args, **kw):
            return attr(*args, **kw)

        return wrapper
Example #2
0
    def boot(self, container: ContainerInterface):
        parameters = container.get_parameters()

        if (is_databricks() and is_notebook_environment()
                and parameters.databricksbundle.enable_notebook_error_handler
                is True and not re.match("^/Users/", get_notebook_path())):
            logger = container.get("databricksbundle.logger")

            set_notebook_error_handler(logger)

            multiple_results_enabled = "spark.databricks.workspace.multipleResults.enabled"

            spark = container.get(SparkSession)

            if is_databricks_repo():
                # pylint: disable=import-outside-toplevel
                import IPython

                link_generator = GithubLinkGenerator()
                IPython.get_ipython().user_ns[
                    "daipe_help"] = link_generator.generate_link_from_module

            if spark.conf.get(multiple_results_enabled) == "false":
                logger.warning(f"{multiple_results_enabled} is set to false!")
                logger.warning("Error messages will not show properly!")
Example #3
0
    def autodetect():
        if is_databricks():
            if is_notebook_environment():
                return DatabricksBundle(DatabricksBundle.DATABRICKS_NOTEBOOK)

            return DatabricksBundle(DatabricksBundle.DATABRICKS_SCRIPT)

        return DatabricksBundle(DatabricksBundle.DATABRICKS_CONNECT)
Example #4
0
    def modify_parameters(self, parameters: Box) -> Box:
        if parameters.daipecore.logger.type == "default":
            parameters.daipecore.logger.type = "databricks"

        if is_databricks():
            parameters.pysparkbundle.dataframe.show_method = "databricks_display"
            parameters.daipecore.pandas.dataframe.show_method = "databricks_display"

        if parameters.pysparkbundle.filesystem is not None:
            raise Exception(
                "pysparkbundle.filesystem parameter must not be explicitly set as dbutils.fs must be used for Databricks-based projects"
            )

        parameters.pysparkbundle.filesystem = "dbutils.fs"

        return parameters
Example #5
0
    def __getattr__(self, attribute_name):
        if self._spark_session is None:
            if not is_databricks():
                # pylint: disable=import-outside-toplevel
                from databricksbundle.spark.java_home_setter import set_java_home

                # pylint: disable=import-outside-toplevel
                from databricksbundle.spark.existing_spark_cleanup import (
                    clean_existing_spark_config, )

                set_java_home()
                clean_existing_spark_config()

                if platform.system() == "Windows":
                    # pylint: disable=import-outside-toplevel
                    from databricksbundle.spark.hadoop_home_setter import (
                        set_hadoop_home_env_var, )

                    set_hadoop_home_env_var()

            self._spark_session = self._factory_callback()

        return getattr(self._spark_session, attribute_name)
 def should_be_resolved(self):
     return is_databricks()