def __getattr__(self, attribute_name): if self._db_utils is None: if not is_databricks(): # pylint: disable=import-outside-toplevel from databricksbundle.spark.java_home_setter import set_java_home # pylint: disable=import-outside-toplevel from databricksbundle.spark.existing_spark_cleanup import ( clean_existing_spark_config, ) set_java_home() clean_existing_spark_config() if platform.system() == "Windows": from databricksbundle.spark.hadoop_home_setter import ( set_hadoop_home_env_var, ) set_hadoop_home_env_var() self._db_utils = self._factory_callback() if hasattr(self._db_utils, attribute_name) is False: raise AttributeError(attribute_name) attr = getattr(self._db_utils, attribute_name) if isinstance(attr, types.FunctionType) is False: return attr def wrapper(*args, **kw): return attr(*args, **kw) return wrapper
def boot(self, container: ContainerInterface): parameters = container.get_parameters() if (is_databricks() and is_notebook_environment() and parameters.databricksbundle.enable_notebook_error_handler is True and not re.match("^/Users/", get_notebook_path())): logger = container.get("databricksbundle.logger") set_notebook_error_handler(logger) multiple_results_enabled = "spark.databricks.workspace.multipleResults.enabled" spark = container.get(SparkSession) if is_databricks_repo(): # pylint: disable=import-outside-toplevel import IPython link_generator = GithubLinkGenerator() IPython.get_ipython().user_ns[ "daipe_help"] = link_generator.generate_link_from_module if spark.conf.get(multiple_results_enabled) == "false": logger.warning(f"{multiple_results_enabled} is set to false!") logger.warning("Error messages will not show properly!")
def autodetect(): if is_databricks(): if is_notebook_environment(): return DatabricksBundle(DatabricksBundle.DATABRICKS_NOTEBOOK) return DatabricksBundle(DatabricksBundle.DATABRICKS_SCRIPT) return DatabricksBundle(DatabricksBundle.DATABRICKS_CONNECT)
def modify_parameters(self, parameters: Box) -> Box: if parameters.daipecore.logger.type == "default": parameters.daipecore.logger.type = "databricks" if is_databricks(): parameters.pysparkbundle.dataframe.show_method = "databricks_display" parameters.daipecore.pandas.dataframe.show_method = "databricks_display" if parameters.pysparkbundle.filesystem is not None: raise Exception( "pysparkbundle.filesystem parameter must not be explicitly set as dbutils.fs must be used for Databricks-based projects" ) parameters.pysparkbundle.filesystem = "dbutils.fs" return parameters
def __getattr__(self, attribute_name): if self._spark_session is None: if not is_databricks(): # pylint: disable=import-outside-toplevel from databricksbundle.spark.java_home_setter import set_java_home # pylint: disable=import-outside-toplevel from databricksbundle.spark.existing_spark_cleanup import ( clean_existing_spark_config, ) set_java_home() clean_existing_spark_config() if platform.system() == "Windows": # pylint: disable=import-outside-toplevel from databricksbundle.spark.hadoop_home_setter import ( set_hadoop_home_env_var, ) set_hadoop_home_env_var() self._spark_session = self._factory_callback() return getattr(self._spark_session, attribute_name)
def should_be_resolved(self): return is_databricks()