def update_collects(self, collect_id, error_list, status, status_only=False): """ Utility function to update collects table status against collect id :param collect_id: Collect id of entry in collects table :param error_list: List of errors if there, that occurred while processing error :param status: status message after processing the collect :param status_only: flag to determine only status needs to be updated :return: True if collects is updated, else False """ self.__logger.info("Executing update_collects function") try: if status_only: update_query = "update collects set status=%s where id=%s" self.__logger.debug('Running query: %s' % update_query % (status, collect_id)) with DBUtils((self.__config['application'])) as dbConn: dbConn.execute_nonquery(update_query, (status, collect_id)) else: update_query = "update collects set status=%s, status_message=%s where id=%s" self.__logger.debug('Running query: %s' % update_query % (status, error_list, collect_id)) with DBUtils((self.__config['application'])) as dbConn: dbConn.execute_nonquery(update_query, (status, json.dumps({'errors': error_list}), collect_id)) return True except Exception as ex: self.__logger.error("Could not update collects table {0}".format(str(ex))) return False
def __init__(self, shell, entry_point, sc, sqlContext, displayHTML): # ugly, but not possible to differentiate <= 8.2 from >= 8.3 try: self._dbutils = DBUtils(shell, entry_point) except: self._dbutils = DBUtils(shell, entry_point, sc, sqlContext, displayHTML) self.fs = self._dbutils.fs self.secrets = self._dbutils.secrets self.notebook = Notebook()
def get_global_settings(self, organization_id): """ Function to get default settings from configurations table. :param organization_id: String :return: Default/Global Settings and True if success, else False and error message """ q_resource_settings = """select settings from resource_settings rs, organizations org where rs.resource_id = org.id and resource_type = 'ORGANIZATION' and setting_group = 'global_settings' and org.orgid = %s """ self.__logger.info("Getting global_settings") with DBUtils(self.__config['application']) as db_conn: self.__logger.debug('Connected to database') self.__logger.debug("Running Query: %s" % q_resource_settings % (organization_id,)) result = db_conn.execute_query(q_resource_settings, (organization_id,)) self.__logger.debug("Result received: %s" % result) # Checking global_settings for the given organization if result is None or len(result) == 0: self.__logger.warning("No global_settings set for organization : %s." % organization_id) self.__logger.debug("Using default settings") default_query = """select * from get_config_json( %s, %s)""" try: with DBUtils(self.__config['application']) as db_conn: self.__logger.debug('Connected to database') self.__logger.debug("Running Query: %s" % default_query % (organization_id, "DefaultSettings")) default_settings = db_conn.execute_query(default_query, (organization_id, "DefaultSettings")) self.__logger.debug("Result received: %s" % default_settings) # Sanity Check if default_settings is None or len(default_settings) == 0: error = "No global settings found" errors = {"error_msg": [error], "identifiers":['']} self.__logger.error(error) return False, errors settings = default_settings[0].get('get_config_json') self.__logger.debug("Result global_settings: {0}".format(str(settings))) return True, settings except Exception as ex: error = "Could not get default settings" errors = {"error_msg": [error], "identifiers":['']} self.__logger.error("Could not get default settings. Exception: {0}".format(ex)) return False, errors else: settings = result[0].get('settings') self.__logger.debug("Result global_settings: {0}".format(str(settings))) return True, settings
def test_connect_success(self): """Test to check validate successful connection""" with DBUtils(json.loads(DBCONFIG)) as dbobj: result = dbobj.execute_query('select version();') self.assertEqual( result[0].get('version'), 'PostgreSQL 9.5.4 on x86_64-pc-linux-gnu, ' + 'compiled by gcc (GCC) 4.8.2 20140120 ' + '(Red Hat 4.8.2-16), 64-bit')
def execute_query(self, built_query, parameters=None): """ Function to execute query with parameters :param built_query: Query to execute :param parameters: parameters to be used :return: result of query, or empty list """ with DBUtils((self.__config['application'])) as dbConn: if not parameters: result = dbConn.execute_query(built_query) else: result = dbConn.execute_query(built_query, parameters) self.__logger.debug("Ran query: {0}".format(dbConn.query)) return result
def get_config_details(self, collect_id): """ Function to get config details from warehouse :param collect_id: collect id that was used in collects table :return: returns configuration details if success, else None """ self.__logger.debug("Executing function get_config_details for collect_id: {0}".format(collect_id)) with DBUtils((self.__config['application'])) as dbConn: collect_row_result = dbConn.execute_query(self.__collect_details_query, (collect_id,)) self.__logger.debug("Ran query: {0}".format(dbConn.query)) if not collect_row_result: self.__logger.error("No data found in collects table for collect_id: {0}".format(collect_id)) self.__logger.info('Exiting script.') return None # LIST OF DICT, GETTING FIRST COLUMN collect_row = collect_row_result[0] self.__logger.debug("Result: {0}".format(str(collect_row))) return collect_row
def get_default_configurations(self, organization_id, config_name): """ Get configuration against organization for a configuration name :param organization_id: Organization id against which configuration is required :param config_name: Name of configuration :return: True and config dict if success, else false and error message """ with DBUtils((self.__config['application'])) as dbConn: result = dbConn.execute_query('select get_config_json as default_config from ' 'get_config_json(%s, %s)', (organization_id, config_name)) self.__logger.debug("Ran query: {0}".format(dbConn.query)) if not result: error_msg = "No default configuration found in configurations table for config type: 'FilterFields'" errors = {"error_msg": [error_msg], "identifiers":['']} self.__logger.error(error_msg) return False, errors result_config = result[0] self.__logger.debug("Result FilterFields: {0}".format(str(result_config))) return True, result_config
def get_import_configuration(self, config_id, file_type): """ Function to get import configuration :param config_id: config id that was used in collects :param file_type: Machine, Storage, Physical Machine or Virtual Machine :return: configuration if true, else None """ self.__logger.debug("Executing function get_import_configuration") if config_id == 0 and file_type == 'Virtual Machine': query = "select get_config_json->>'vm' as mapped_headers,'t' as is_first_row_header," \ " 'virtual_machine' as import_entity_type, 0 as headers_count, 'comma' as " \ "column_separator from get_config_json('default','MappedHeaders')" elif config_id == 0 and file_type == 'Physical Machine': query = "select get_config_json->>'physical' as mapped_headers,'t' as is_first_row_header," \ " 'physical_machine' as import_entity_type, 0 as headers_count, 'comma' as " \ "column_separator from get_config_json('default','MappedHeaders')" elif config_id == 0 and file_type == 'License': query = "select get_config_json->>'license' as mapped_headers,'t' as is_first_row_header," \ " 'license' as import_entity_type, 0 as headers_count, 'comma' as " \ "column_separator from get_config_json('default','MappedHeaders')" else: query = "select is_first_row_header, import_entity_type, mapped_headers, headers_count, " \ "column_separator, update_only from import_configurations where config_id=%s" with DBUtils((self.__config['application'])) as dbConn: if config_id: import_configuration_result = dbConn.execute_query(query,(config_id,)) else: import_configuration_result = dbConn.execute_query(query) self.__logger.debug("Ran query: {0}".format(dbConn.query)) if not import_configuration_result: error = "Configuration not found" self.__logger.error("Configuration not found for config id: {}".format(config_id)) errors = {"error_msg": [error], "identifiers":['']} return False, errors import_configuration = import_configuration_result[0] self.__logger.debug("Result import_configuration: {0}".format(str(import_configuration))) return True, import_configuration
def test_connect_incorrect_creds(self): """Test to validate incorrect credentials""" with self.assertRaises(psycopg2.OperationalError): with DBUtils(json.loads(DBCONFIG_WRONGCRED)) as dbobj: pass
def dbcontext(progressbar=True): """Create a databricks context The following objects will be created - Spark Session - Spark Context - Spark Hive Context - DBUtils (fs module only) Args: progressbar (bool, optional): If True the spark progressbar will be installed. Defaults to True. """ def get_sparkui_url(host, organisation, clusterId): if organisation is None: sparkUi = "%s#/setting/clusters/%s/sparkUi" % (host, clusterId) else: sparkUi = "%s/?o=%s#/setting/clusters/%s/sparkUi" % ( host, organisation, clusterId) return sparkUi def show_status(spark, sparkUi): output = """ <div> <dl> <dt>Spark Version</dt><dd>{sc.version}</dd> <dt>Spark Application</dt><dd>{sc.appName}</dd> <dt>Spark UI</dt><dd><a href="{sparkUi}">go to ...</a></dd> </dl> </div> """.format( sc=spark.sparkContext, sparkUi=get_sparkui_url(host, organisation, clusterId), num_executors=len(spark.sparkContext._jsc.sc().statusTracker(). getExecutorInfos()), ) display(HTML(output)) # Get the configuration injected by the client # profile = os.environ.get("DBJL_PROFILE", None) host = os.environ.get("DBJL_HOST", None) clusterId = os.environ.get("DBJL_CLUSTER", None) organisation = os.environ.get("DBJL_ORG", None) sparkUi = get_sparkui_url(host, organisation, clusterId) if not is_remote(): return "This is not a remote Databricks kernel" ip = get_ipython() spark = ip.user_ns.get("spark") if spark is not None: print("Spark context already exists") load_css() show_status(spark, sparkUi) return None # Create a Databricks virtual python environment and start thew py4j gateway # token = getpass.getpass( "Creating a Spark execution context:\nEnter personal access token for profile '%s'" % profile) try: command = Command(url=host, cluster_id=clusterId, token=token) except DatabricksApiException as ex: print(ex) return None print("Gateway created for cluster '%s' " % (clusterId), end="", flush=True) # Fetch auth_token and gateway port ... # cmd = 'c=sc._gateway.client.gateway_client; print(c.gateway_parameters.auth_token + "|" + str(c.port))' result = command.execute(cmd) if result[0] != 0: print(result[1]) return None auth_token, port = result[1].split("|") port = int(port) interpreter = "/databricks/python/bin/python" # Ensure that driver and executors use the same python # os.environ["PYSPARK_PYTHON"] = interpreter os.environ["PYSPARK_DRIVER_PYTHON"] = interpreter # ... and connect to this gateway # gateway = get_existing_gateway(port, True, auth_token) print(". connected") # print("Python interpreter: %s" % interpreter) # Retrieve spark session, sqlContext and sparkContext # conf = SparkConf(_jconf=gateway.entry_point.getSparkConf()) sqlContext = RemoteContext(gateway=gateway, conf=conf) sqlContext = HiveContext(sqlContext, gateway.entry_point.getSQLContext()) spark = sqlContext.sparkSession sc = spark.sparkContext # Enable pretty printing of dataframes # spark.conf.set("spark.sql.repl.eagerEval.enabled", "true") # Define a separate pool for the fair scheduler # Todo: Find a better way to store pool_id instead of this hack # job_info = JobInfo(str(random.getrandbits(64))) # Patch the remote spark UI into the _repr_html_ call # def repr_html(uiWebUrl): def sc_repr_html(): return """ <div> <p><b>SparkContext</b></p> <p><a href="{uiWebUrl}">Spark UI</a></p> <dl> <dt>Version</dt><dd><code>v{sc.version}</code></dd> <dt>AppName</dt><dd><code>{sc.appName}</code></dd> <dt>Master</dt><dd><code>{sc.master}</code></dd> </dl> </div> """.format(sc=spark.sparkContext, uiWebUrl=uiWebUrl) return sc_repr_html sc_repr_html = repr_html(sparkUi) sc._repr_html_ = sc_repr_html # Monkey patch Databricks Cli to allow mlflow tracking with the credentials provided # by this routine # Only necessary when mlflow is installed # try: from databricks_cli.configure.provider import ProfileConfigProvider, DatabricksConfig def get_config(self): config = DatabricksConfig(host, None, None, token, False) if config.is_valid: return config return None ProfileConfigProvider.get_config = get_config except: pass # Initialize the ipython shell with spark context # shell = get_ipython() shell.sc = sc shell.sqlContext = sqlContext shell.displayHTML = lambda html: display(HTML(html)) # Retrieve the py4j gateway entrypoint # entry_point = spark.sparkContext._gateway.entry_point # Initialize dbutils # dbutils = DBUtils(shell, entry_point) # Setting up Spark progress bar # if progressbar: # print("Set up Spark progress bar") load_progressbar(ip, sc, job_info) load_css() # Register sql magic # ip.register_magic_function(sql, magic_kind="line_cell") # Ensure that the virtual python environment and py4j gateway gets shut down # when the python interpreter shuts down # def shutdown_kernel(command): def handler(): from IPython import get_ipython ip = get_ipython() ip = get_ipython() if ip.user_ns.get("spark", None) is not None: del ip.user_ns["spark"] if ip.user_ns.get("sc", None) is not None: del ip.user_ns["sc"] if ip.user_ns.get("sqlContext", None) is not None: del ip.user_ns["sqlContext"] if ip.user_ns.get("dbutils", None) is not None: del ip.user_ns["dbutils"] # Context is a singleton command.close() return handler atexit.register(shutdown_kernel(command)) # Forward spark variables to the user namespace # ip.user_ns["spark"] = spark ip.user_ns["sc"] = sc ip.user_ns["sqlContext"] = sqlContext ip.user_ns["dbutils"] = dbutils ip.user_ns["dbbrowser"] = DatabricksBrowser(spark, dbutils) print("The following global variables have been created:") print("- spark Spark session") print("- sc Spark context") print("- sqlContext Hive Context") print("- dbutils Databricks utilities (filesystem access only)") print("- dbbrowser Allows to browse dbfs and databases:") print(" - dbbrowser.dbfs()") print(" - dbbrowser.databases()\n") show_status(spark, sparkUi) return None
import os from dotenv import load_dotenv, find_dotenv from odoa import ODOA from telegram.ext import Updater, CommandHandler from dbutils import DBUtils odoa = ODOA() load_dotenv(find_dotenv()) logging.basicConfig( format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO, ) logger = logging.getLogger(__name__) db = DBUtils(db_name=os.getenv('DB')) def get_surah(): surah = odoa.get_random_surah() ayah = surah.ayah.decode('utf8') description = surah.desc translate = surah.translate message = f'{description}\n\n{ayah}\n\n{translate}' return message def start_handler(bot, update): username = update.message.from_user.username message = (f'Hi {username},\n\n' f'SaHaDa akan mengirimkan 2 surat beserta terjemahan setiap '
def __init__(self, shell, entry_point): self._dbutils = DBUtils(shell, entry_point) self.fs = self._dbutils.fs self.secrets = self._dbutils.secrets self.notebook = Notebook()