Python DBUtils.DBUtils Exemples, dbutils.DBUtils.DBUtils Python Exemples

Exemple #1

0

Afficher le fichier

 def update_collects(self, collect_id, error_list, status, status_only=False):
     """
     Utility function to update collects table status against collect id
     :param collect_id: Collect id of entry in collects table
     :param error_list: List of errors if there, that occurred while processing error
     :param status: status message after processing the collect
     :param status_only: flag to determine only status needs to be updated
     :return: True if collects is updated, else False
     """
     self.__logger.info("Executing update_collects function")
     try:
         if status_only:
             update_query = "update collects set status=%s where id=%s"
             self.__logger.debug('Running query: %s' % update_query % (status, collect_id))
             with DBUtils((self.__config['application'])) as dbConn:
                 dbConn.execute_nonquery(update_query, (status, collect_id))
         else:
             update_query = "update collects set status=%s, status_message=%s where id=%s"
             self.__logger.debug('Running query: %s' % update_query % (status, error_list, collect_id))
             with DBUtils((self.__config['application'])) as dbConn:
                 dbConn.execute_nonquery(update_query,
                                         (status, json.dumps({'errors': error_list}),
                                          collect_id))
         return True
     except Exception as ex:
         self.__logger.error("Could not update collects table {0}".format(str(ex)))
         return False

Exemple #2

0

Afficher le fichier

    def __init__(self, shell, entry_point, sc, sqlContext, displayHTML):
        # ugly, but not possible to differentiate <= 8.2 from >= 8.3
        try:
            self._dbutils = DBUtils(shell, entry_point)
        except:
            self._dbutils = DBUtils(shell, entry_point, sc, sqlContext,
                                    displayHTML)

        self.fs = self._dbutils.fs
        self.secrets = self._dbutils.secrets
        self.notebook = Notebook()

Exemple #3

0

Afficher le fichier

    def get_global_settings(self, organization_id):
        """
        Function to get default settings  from configurations table.
        :param organization_id: String
        :return: Default/Global Settings and True if success, else False and error message
        """
        q_resource_settings = """select settings from resource_settings rs, organizations org where rs.resource_id = org.id
                                        and resource_type = 'ORGANIZATION' and setting_group = 'global_settings'
                                        and org.orgid = %s """

        self.__logger.info("Getting global_settings")

        with DBUtils(self.__config['application']) as db_conn:
            self.__logger.debug('Connected to database')
            self.__logger.debug("Running Query: %s" % q_resource_settings % (organization_id,))
            result = db_conn.execute_query(q_resource_settings, (organization_id,))
            self.__logger.debug("Result  received: %s" % result)
        # Checking global_settings for the given organization
        if result is None or len(result) == 0:
            self.__logger.warning("No global_settings set for organization : %s." % organization_id)
            self.__logger.debug("Using default settings")
            default_query = """select * from get_config_json( %s, %s)"""
            try:
                with DBUtils(self.__config['application']) as db_conn:
                    self.__logger.debug('Connected to database')
                    self.__logger.debug("Running Query: %s" % default_query % (organization_id, "DefaultSettings"))
                    default_settings = db_conn.execute_query(default_query, (organization_id, "DefaultSettings"))
                    self.__logger.debug("Result  received: %s" % default_settings)

                    # Sanity Check
                    if default_settings is None or len(default_settings) == 0:
                        error = "No global settings found"
                        errors = {"error_msg": [error], "identifiers":['']}
                        self.__logger.error(error)
                        return False, errors
                settings = default_settings[0].get('get_config_json')
                self.__logger.debug("Result global_settings: {0}".format(str(settings)))
                return True, settings
            except Exception as ex:
                error = "Could not get default settings"
                errors = {"error_msg": [error], "identifiers":['']}
                self.__logger.error("Could not get default settings. Exception: {0}".format(ex))
                return False, errors
        else:
            settings = result[0].get('settings')
            self.__logger.debug("Result global_settings: {0}".format(str(settings)))
            return True, settings

Exemple #4

0

Afficher le fichier

 def test_connect_success(self):
     """Test to check validate successful connection"""
     with DBUtils(json.loads(DBCONFIG)) as dbobj:
         result = dbobj.execute_query('select version();')
         self.assertEqual(
             result[0].get('version'),
             'PostgreSQL 9.5.4 on x86_64-pc-linux-gnu, ' +
             'compiled by gcc (GCC) 4.8.2 20140120 ' +
             '(Red Hat 4.8.2-16), 64-bit')

Exemple #5

0

Afficher le fichier

 def execute_query(self, built_query, parameters=None):
     """
     Function to execute query with parameters
     :param built_query: Query to execute
     :param parameters: parameters to be used
     :return: result of query, or empty list
     """
     with DBUtils((self.__config['application'])) as dbConn:
         if not parameters:
             result = dbConn.execute_query(built_query)
         else:
             result = dbConn.execute_query(built_query, parameters)                
         self.__logger.debug("Ran query: {0}".format(dbConn.query))
     return result

Exemple #6

0

Afficher le fichier

    def get_config_details(self, collect_id):
        """
        Function to get config details from warehouse
        :param collect_id: collect id that was used in collects table
        :return: returns configuration details if success, else None
        """
        self.__logger.debug("Executing function get_config_details for collect_id: {0}".format(collect_id))
        with DBUtils((self.__config['application'])) as dbConn:
            collect_row_result = dbConn.execute_query(self.__collect_details_query, (collect_id,))
            self.__logger.debug("Ran query: {0}".format(dbConn.query))

        if not collect_row_result:
            self.__logger.error("No data found in collects table for collect_id: {0}".format(collect_id))
            self.__logger.info('Exiting script.')
            return None
        # LIST OF DICT, GETTING FIRST COLUMN
        collect_row = collect_row_result[0]
        self.__logger.debug("Result: {0}".format(str(collect_row)))
        return collect_row

Exemple #7

0

Afficher le fichier

    def get_default_configurations(self, organization_id, config_name):
        """
        Get configuration against organization for a configuration name
        :param organization_id: Organization id against which configuration is required
        :param config_name: Name of configuration
        :return: True and config dict if success, else false and error message
        """
        with DBUtils((self.__config['application'])) as dbConn:
                result = dbConn.execute_query('select get_config_json as default_config from '
                                              'get_config_json(%s, %s)', (organization_id, config_name))
                self.__logger.debug("Ran query: {0}".format(dbConn.query))
        if not result:
            error_msg = "No default configuration found in configurations table for config type: 'FilterFields'"
            errors = {"error_msg": [error_msg], "identifiers":['']}
            self.__logger.error(error_msg)
            return False, errors
        result_config = result[0]
        self.__logger.debug("Result FilterFields: {0}".format(str(result_config)))

        return True, result_config

Exemple #8

0

Afficher le fichier

    def get_import_configuration(self, config_id, file_type):
        """
        Function to get import configuration
        :param config_id: config id that was used in collects
        :param file_type: Machine, Storage, Physical Machine or Virtual Machine
        :return: configuration if true, else None
        """
        self.__logger.debug("Executing function get_import_configuration")
        if config_id == 0 and file_type == 'Virtual Machine':
            query = "select get_config_json->>'vm' as mapped_headers,'t' as is_first_row_header," \
                    " 'virtual_machine' as import_entity_type, 0 as headers_count, 'comma' as " \
                    "column_separator from get_config_json('default','MappedHeaders')"
        elif config_id == 0 and file_type == 'Physical Machine':
            query = "select get_config_json->>'physical'  as mapped_headers,'t' as is_first_row_header," \
                    " 'physical_machine' as import_entity_type, 0 as headers_count, 'comma' as " \
                    "column_separator from get_config_json('default','MappedHeaders')"
        elif config_id == 0 and file_type == 'License':
            query = "select get_config_json->>'license'  as mapped_headers,'t' as is_first_row_header," \
                    " 'license' as import_entity_type, 0 as headers_count, 'comma' as " \
                    "column_separator from get_config_json('default','MappedHeaders')"
        else:
            query = "select is_first_row_header, import_entity_type, mapped_headers, headers_count, " \
                    "column_separator, update_only from import_configurations where config_id=%s"

        with DBUtils((self.__config['application'])) as dbConn:
            if config_id:
                import_configuration_result = dbConn.execute_query(query,(config_id,))
            else:
                import_configuration_result = dbConn.execute_query(query)
            self.__logger.debug("Ran query: {0}".format(dbConn.query))
        
        if not import_configuration_result:
            error = "Configuration not found"
            self.__logger.error("Configuration not found for config id: {}".format(config_id))
            errors = {"error_msg": [error], "identifiers":['']}
            return False, errors
        
        import_configuration = import_configuration_result[0]
        self.__logger.debug("Result import_configuration: {0}".format(str(import_configuration)))

        return True, import_configuration

Exemple #9

0

Afficher le fichier

 def test_connect_incorrect_creds(self):
     """Test to validate incorrect credentials"""
     with self.assertRaises(psycopg2.OperationalError):
         with DBUtils(json.loads(DBCONFIG_WRONGCRED)) as dbobj:
             pass

Exemple #10

0

Afficher le fichier

Fichier : connect.py Projet : pauldx/jupyterlab-integration

def dbcontext(progressbar=True):
    """Create a databricks context
    The following objects will be created
    - Spark Session
    - Spark Context
    - Spark Hive Context
    - DBUtils (fs module only)
    
    Args:
        progressbar (bool, optional): If True the spark progressbar will be installed. Defaults to True.
    """
    def get_sparkui_url(host, organisation, clusterId):
        if organisation is None:
            sparkUi = "%s#/setting/clusters/%s/sparkUi" % (host, clusterId)
        else:
            sparkUi = "%s/?o=%s#/setting/clusters/%s/sparkUi" % (
                host, organisation, clusterId)
        return sparkUi

    def show_status(spark, sparkUi):
        output = """
        <div>
            <dl>
            <dt>Spark Version</dt><dd>{sc.version}</dd>
            <dt>Spark Application</dt><dd>{sc.appName}</dd>
            <dt>Spark UI</dt><dd><a href="{sparkUi}">go to ...</a></dd>
            </dl>
        </div>
        """.format(
            sc=spark.sparkContext,
            sparkUi=get_sparkui_url(host, organisation, clusterId),
            num_executors=len(spark.sparkContext._jsc.sc().statusTracker().
                              getExecutorInfos()),
        )
        display(HTML(output))

    # Get the configuration injected by the client
    #
    profile = os.environ.get("DBJL_PROFILE", None)
    host = os.environ.get("DBJL_HOST", None)
    clusterId = os.environ.get("DBJL_CLUSTER", None)
    organisation = os.environ.get("DBJL_ORG", None)

    sparkUi = get_sparkui_url(host, organisation, clusterId)

    if not is_remote():
        return "This is not a remote Databricks kernel"

    ip = get_ipython()
    spark = ip.user_ns.get("spark")
    if spark is not None:
        print("Spark context already exists")
        load_css()
        show_status(spark, sparkUi)
        return None

    # Create a Databricks virtual python environment and start thew py4j gateway
    #
    token = getpass.getpass(
        "Creating a Spark execution context:\nEnter personal access token for profile '%s'"
        % profile)

    try:
        command = Command(url=host, cluster_id=clusterId, token=token)
    except DatabricksApiException as ex:
        print(ex)
        return None

    print("Gateway created for cluster '%s' " % (clusterId),
          end="",
          flush=True)

    # Fetch auth_token and gateway port ...
    #
    cmd = 'c=sc._gateway.client.gateway_client; print(c.gateway_parameters.auth_token + "|" + str(c.port))'
    result = command.execute(cmd)

    if result[0] != 0:
        print(result[1])
        return None

    auth_token, port = result[1].split("|")
    port = int(port)

    interpreter = "/databricks/python/bin/python"
    # Ensure that driver and executors use the same python
    #
    os.environ["PYSPARK_PYTHON"] = interpreter
    os.environ["PYSPARK_DRIVER_PYTHON"] = interpreter

    # ... and connect to this gateway
    #
    gateway = get_existing_gateway(port, True, auth_token)
    print(". connected")
    # print("Python interpreter: %s" % interpreter)

    # Retrieve spark session, sqlContext and sparkContext
    #
    conf = SparkConf(_jconf=gateway.entry_point.getSparkConf())
    sqlContext = RemoteContext(gateway=gateway, conf=conf)

    sqlContext = HiveContext(sqlContext, gateway.entry_point.getSQLContext())
    spark = sqlContext.sparkSession
    sc = spark.sparkContext

    # Enable pretty printing of dataframes
    #
    spark.conf.set("spark.sql.repl.eagerEval.enabled", "true")

    # Define a separate pool for the fair scheduler
    # Todo: Find a better way to store pool_id instead of this hack
    #
    job_info = JobInfo(str(random.getrandbits(64)))

    # Patch the remote spark UI into the _repr_html_ call
    #
    def repr_html(uiWebUrl):
        def sc_repr_html():
            return """
            <div>
                <p><b>SparkContext</b></p>
                <p><a href="{uiWebUrl}">Spark UI</a></p>
                <dl>
                  <dt>Version</dt><dd><code>v{sc.version}</code></dd>
                  <dt>AppName</dt><dd><code>{sc.appName}</code></dd>
                  <dt>Master</dt><dd><code>{sc.master}</code></dd>
                </dl>
            </div>
            """.format(sc=spark.sparkContext, uiWebUrl=uiWebUrl)

        return sc_repr_html

    sc_repr_html = repr_html(sparkUi)
    sc._repr_html_ = sc_repr_html

    # Monkey patch Databricks Cli to allow mlflow tracking with the credentials provided
    # by this routine
    # Only necessary when mlflow is installed
    #
    try:
        from databricks_cli.configure.provider import ProfileConfigProvider, DatabricksConfig

        def get_config(self):
            config = DatabricksConfig(host, None, None, token, False)
            if config.is_valid:
                return config
            return None

        ProfileConfigProvider.get_config = get_config
    except:
        pass

    # Initialize the ipython shell with spark context
    #
    shell = get_ipython()
    shell.sc = sc
    shell.sqlContext = sqlContext
    shell.displayHTML = lambda html: display(HTML(html))

    # Retrieve the py4j gateway entrypoint
    #
    entry_point = spark.sparkContext._gateway.entry_point

    # Initialize dbutils
    #
    dbutils = DBUtils(shell, entry_point)

    # Setting up Spark progress bar
    #
    if progressbar:
        # print("Set up Spark progress bar")
        load_progressbar(ip, sc, job_info)
        load_css()

    # Register sql magic
    #
    ip.register_magic_function(sql, magic_kind="line_cell")

    # Ensure that the virtual python environment and py4j gateway gets shut down
    # when the python interpreter shuts down
    #
    def shutdown_kernel(command):
        def handler():
            from IPython import get_ipython

            ip = get_ipython()

            ip = get_ipython()
            if ip.user_ns.get("spark", None) is not None:
                del ip.user_ns["spark"]
            if ip.user_ns.get("sc", None) is not None:
                del ip.user_ns["sc"]
            if ip.user_ns.get("sqlContext", None) is not None:
                del ip.user_ns["sqlContext"]
            if ip.user_ns.get("dbutils", None) is not None:
                del ip.user_ns["dbutils"]
            # Context is a singleton
            command.close()

        return handler

    atexit.register(shutdown_kernel(command))

    # Forward spark variables to the user namespace
    #
    ip.user_ns["spark"] = spark
    ip.user_ns["sc"] = sc
    ip.user_ns["sqlContext"] = sqlContext
    ip.user_ns["dbutils"] = dbutils
    ip.user_ns["dbbrowser"] = DatabricksBrowser(spark, dbutils)

    print("The following global variables have been created:")
    print("- spark       Spark session")
    print("- sc          Spark context")
    print("- sqlContext  Hive Context")
    print("- dbutils     Databricks utilities (filesystem access only)")
    print("- dbbrowser   Allows to browse dbfs and databases:")
    print("              - dbbrowser.dbfs()")
    print("              - dbbrowser.databases()\n")

    show_status(spark, sparkUi)
    return None

Exemple #11

0

Afficher le fichier

Fichier : main.py Projet : Keda87/odoa-telegram-bot

import os

from dotenv import load_dotenv, find_dotenv
from odoa import ODOA
from telegram.ext import Updater, CommandHandler

from dbutils import DBUtils

odoa = ODOA()
load_dotenv(find_dotenv())
logging.basicConfig(
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    level=logging.INFO,
)
logger = logging.getLogger(__name__)
db = DBUtils(db_name=os.getenv('DB'))


def get_surah():
    surah = odoa.get_random_surah()
    ayah = surah.ayah.decode('utf8')
    description = surah.desc
    translate = surah.translate
    message = f'{description}\n\n{ayah}\n\n{translate}'
    return message


def start_handler(bot, update):
    username = update.message.from_user.username
    message = (f'Hi {username},\n\n'
               f'SaHaDa akan mengirimkan 2 surat beserta terjemahan setiap '

Exemple #12

0

Afficher le fichier

Fichier : connect.py Projet : jinlmsft/jupyterlab-integration

 def __init__(self, shell, entry_point):
     self._dbutils = DBUtils(shell, entry_point)
     self.fs = self._dbutils.fs
     self.secrets = self._dbutils.secrets
     self.notebook = Notebook()