Python get Examples

Programming Language: Python

Namespace/Package Name: ConfigReader.configuration

Method/Function: get

Examples at hotexamples.com: 10

Python get - 10 examples found. These are the top rated real world Python examples of ConfigReader.configuration.get extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: restServer.py Project: tspannhw/DBImport

	def run(self):
		logger = "restServer"
		log = logging.getLogger(logger)
		log.info("REST Server started")
		self.mysql_conn = None
		self.mysql_cursor = None
		self.configDBSession = None
		self.configDBEngine = None
		self.debugLogLevel = False

		if logging.root.level == 10:        # DEBUG
			self.debugLogLevel = True

		restAddress = configuration.get("Server", "restServer_address")
		restPort = configuration.get("Server", "restServer_port")

		if restAddress.strip() != "" and restPort.strip() != "":
			app = Flask("restServer")
			api = Api(app)
			api.add_resource(restRoot, '/')
			api.add_resource(restStatus, '/status')
			api.add_resource(restJdbcConnections, '/jdbc_connections')

			log.info("Starting RESTserver on %s:%s"%(restAddress, restPort))
			serve(
				TransLogger(app, setup_console_handler=False, logger=logging.getLogger("restServerAccess")), 
				host=restAddress, 
				port=restPort, 
				ident="DBImport REST Server", 
				url_scheme='https',
				_quiet=True)

Example #2

Show file

File: common_operations.py Project: BandeepSingh/DBImport

	def __init__(self, Hive_DB=None, Hive_Table=None):
		logging.debug("Executing common_operation.__init__()")

		self.Hive_DB = Hive_DB	 
		self.Hive_Table = Hive_Table	 
#		self.mysql_conn = None
#		self.mysql_cursor = None
		self.hive_conn = None
		self.hive_cursor = None
		self.debugLogLevel = False

		if logging.root.level == 10:		# DEBUG
			self.debugLogLevel = True

		# Fetch and initialize the Kerberos configuration
		self.kerberosPrincipal = configuration.get("Kerberos", "principal")
		self.webHCatAuth = HTTPKerberosAuth(force_preemptive=True, principal=self.kerberosPrincipal)

		self.common_config = common_config.config()

		# Fetch configuration details about Hive LLAP
#		self.hive_hostname = configuration.get("Hive", "hostname")
#		self.hive_port = configuration.get("Hive", "port")
		self.hive_servers = configuration.get("Hive", "servers")
		self.hive_kerberos_service_name = configuration.get("Hive", "kerberos_service_name")
		self.hive_kerberos_realm = configuration.get("Hive", "kerberos_realm")
		self.hive_print_messages = self.common_config.getConfigValue(key = "hive_print_messages")
		if configuration.get("Hive", "use_ssl").lower() == "true":
			self.hive_use_ssl = True
		else:
			self.hive_use_ssl = False

		self.hive_min_buckets = int(configuration.get("Hive", "min_buckets"))
		self.hive_max_buckets = int(configuration.get("Hive", "max_buckets"))

		# HDFS Settings
		self.hdfs_address = self.common_config.getConfigValue(key = "hdfs_address")
		self.hdfs_basedir = self.common_config.getConfigValue(key = "hdfs_basedir")
		self.hdfs_blocksize = self.common_config.getConfigValue(key = "hdfs_blocksize")

		self.hiveConnectStr = configuration.get("Hive", "hive_metastore_alchemy_conn")

		try:
			self.hiveMetaDB = sa.create_engine(self.hiveConnectStr, echo = self.debugLogLevel)
			self.hiveMetaDB.connect()
			self.hiveMetaSession = sessionmaker(bind=self.hiveMetaDB)
		except sa.exc.OperationalError as err:
			logging.error("%s"%err)
			self.common_config.remove_temporary_files()
			sys.exit(1)
		except:
			print("Unexpected error: ")
			print(sys.exc_info())
			self.common_config.remove_temporary_files()
			sys.exit(1)

		logging.debug("Executing common_operations.__init__() - Finished")

Example #3

Show file

    def __init__(self):
        logging.debug("Executing rest.__init__()")

        self.headers = {'Content-type': 'application/json'}

        self.RESTendpoint = configuration.get("REST_statistics",
                                              "rest_endpoint")
        self.RESTtimeout = configuration.get("REST_statistics", "timeout")

        if self.RESTendpoint == "":
            logging.error(
                "Cant find the REST endpoint. Please check configuration file")
            sys.exit(1)

        if self.RESTtimeout == "":
            logging.error(
                "Cant find the REST endpoint timeout. Please check configuration file"
            )
            sys.exit(1)

Example #4

Show file

File: import_stage.py Project: Dundo777/DBImport

    def __init__(self, mysql_conn, Hive_DB, Hive_Table):
        logging.debug("Executing stage.__init__()")

        self.Hive_DB = Hive_DB
        self.Hive_Table = Hive_Table
        self.mysql_conn = mysql_conn
        self.mysql_cursor = self.mysql_conn.cursor(buffered=False)
        self.currentStage = None
        self.memoryStage = False
        self.stageTimeStart = None
        self.stageTimeStop = None
        self.stageDurationStart = float()
        self.stageDurationStop = float()
        self.stageDurationTime = float()

        if configuration.get("REST_statistics",
                             "post_import_data").lower() == "true":
            self.post_import_data = True
        else:
            self.post_import_data = False

        self.rest = rest.restInterface()

Example #5

Show file

    def __init__(self):
        logging.debug("Executing database.__init__()")

        self.mysql_conn = None
        self.mysql_cursor = None
        self.debugLogLevel = False

        if logging.root.level == 10:  # DEBUG
            self.debugLogLevel = True

        try:
            DBImport_Home = os.environ['DBIMPORT_HOME']
        except KeyError:
            print(
                "Error: System Environment Variable DBIMPORT_HOME is not set")
            #			self.remove_temporary_files()
            sys.exit(1)

        # Fetch configuration about MySQL database and how to connect to it
        self.configHostname = configuration.get("Database", "mysql_hostname")
        self.configPort = configuration.get("Database", "mysql_port")
        self.configDatabase = configuration.get("Database", "mysql_database")
        self.configUsername = configuration.get("Database", "mysql_username")
        self.configPassword = configuration.get("Database", "mysql_password")

        # Esablish a SQLAlchemy connection to the DBImport database
        #		try:
        self.connectStr = "mysql+pymysql://%s:%s@%s:%s/%s" % (
            self.configUsername, self.configPassword, self.configHostname,
            self.configPort, self.configDatabase)

        try:
            self.configDB = sa.create_engine(self.connectStr,
                                             echo=self.debugLogLevel)
            self.configDB.connect()
            self.configDBSession = sessionmaker(bind=self.configDB)

        except sa.exc.OperationalError as err:
            logging.error("%s" % err)
            sys.exit(1)
        except:
            print("Unexpected error: ")
            print(sys.exc_info())
            sys.exit(1)

        # Setup configuration for Alembic
        self.alembicSchemaDir = DBImport_Home + '/bin/SchemaUpgrade'
        self.alembicConfig = Config()
        self.alembicConfig.set_main_option('script_location',
                                           self.alembicSchemaDir)
        self.alembicConfig.set_main_option('sqlalchemy.url', self.connectStr)

        # Esablish a connection to the DBImport database in MySQL
        try:
            self.mysql_conn = mysql.connector.connect(
                host=self.configHostname,
                port=self.configPort,
                database=self.configDatabase,
                user=self.configUsername,
                password=self.configPassword)
        except mysql.connector.errors.ProgrammingError as err:
            logging.error("%s" % err)
            #			self.remove_temporary_files()
            sys.exit(1)
        except mysql.connector.Error as err:
            if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
                logging.error(
                    "Something is wrong with your user name or password")
            elif err.errno == errorcode.ER_BAD_DB_ERROR:
                logging.error("Database does not exist")
            else:
                logging.error("%s" % err)
            logging.error(
                "Error: There was a problem connecting to the MySQL database. Please check configuration and serverstatus and try again"
            )
            #			self.remove_temporary_files()
            sys.exit(1)
        else:
            self.mysql_cursor = self.mysql_conn.cursor(buffered=False)

        logging.debug("Executing database.__init__() - Finished")

Example #6

Show file

    def __init__(self):
        logging.debug("Executing rest.__init__()")

        self.mysql_conn = None
        self.mysql_cursor_01 = None
        self.mysql_cursor_02 = None

        self.RESTendpoint = configuration.get("REST_statistics",
                                              "rest_endpoint")
        if self.RESTendpoint == "":
            logging.error(
                "Cant find the REST endpoint. Please check configuration file")
            sys.exit(1)

        # Fetch configuration about MySQL database and how to connect to it
        mysql_hostname = configuration.get("Database", "mysql_hostname")
        mysql_port = configuration.get("Database", "mysql_port")
        mysql_database = configuration.get("Database", "mysql_database")
        mysql_username = configuration.get("Database", "mysql_username")
        mysql_password = configuration.get("Database", "mysql_password")

        # Esablish a connection to the DBImport database in MySQL
        try:
            self.mysql_conn = mysql.connector.connect(host=mysql_hostname,
                                                      port=mysql_port,
                                                      database=mysql_database,
                                                      user=mysql_username,
                                                      password=mysql_password)
        except mysql.connector.Error as err:
            if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
                logging.error(
                    "Something is wrong with your user name or password")
            elif err.errno == errorcode.ER_BAD_DB_ERROR:
                logging.error("Database does not exist")
            else:
                logging.error("%s" % err)
            logging.error(
                "Error: There was a problem connecting to the MySQL database. Please check configuration and serverstatus and try again"
            )
            self.remove_temporary_files()
            sys.exit(1)
        else:
            self.mysql_cursor_01 = self.mysql_conn.cursor(buffered=False)
            self.mysql_cursor_02 = self.mysql_conn.cursor(buffered=False)

        rest = restInterface()

        query = "select id, jsondata from json_to_rest"
        self.mysql_cursor_01.execute(query)
        logging.debug("SQL Statement executed: %s" %
                      (self.mysql_cursor_01.statement))

        successCounter = 0
        errorCounter = 0

        for row in self.mysql_cursor_01.fetchall():
            jsonID = row[0]
            jsonData = row[1]
            response_code = -1

            response_code = rest.sendData(jsonData)

            if response_code == 200:
                query = "delete from json_to_rest where id = %s"
                self.mysql_cursor_02.execute(query, (jsonID, ))
                logging.debug("SQL Statement executed: %s" %
                              (self.mysql_cursor_02.statement))
                self.mysql_conn.commit()
                successCounter += 1
            else:
                errorCounter += 1

        logging.info("Transmitted %s JSON documents to %s" %
                     (successCounter, self.RESTendpoint))
        if errorCounter > 0:
            logging.error("%s errors encountered" % (errorCounter))

        self.mysql_conn.close()

Example #7

Show file

File: daemon.py Project: Dundo777/DBImport

    def run(self):
        log = logging.getLogger(self.loggerName)
        log.info("distCP %s started" % (self.name))

        yarnQueue = configuration.get("Server", "distCP_yarnqueue")

        while not self.threadStopEvent.isSet():
            distCPrequest = self.distCPreqQueue.get()
            if distCPrequest is None:
                time.sleep(1)
                break

            tableID = distCPrequest.get('tableID')
            hiveDB = distCPrequest.get('hiveDB')
            hiveTable = distCPrequest.get('hiveTable')
            destination = distCPrequest.get('destination')
            failures = distCPrequest.get('failures')
            HDFSsourcePath = distCPrequest.get('HDFSsourcePath')
            HDFStargetPath = distCPrequest.get('HDFStargetPath')

            log.info(
                "Thread %s: Starting a new distCP copy with the following paramaters"
                % (self.name))
            log.info(
                "Thread %s: --------------------------------------------------------"
                % (self.name))
            log.info("Thread %s: tableID = %s" % (self.name, tableID))
            log.info("Thread %s: hiveDB = %s" % (self.name, hiveDB))
            log.info("Thread %s: hiveTable = %s" % (self.name, hiveTable))
            log.info("Thread %s: destination = %s" % (self.name, destination))
            log.info("Thread %s: HDFSsourcePath = %s" %
                     (self.name, HDFSsourcePath))
            log.info("Thread %s: HDFStargetPath = %s" %
                     (self.name, HDFStargetPath))
            log.info(
                "Thread %s: --------------------------------------------------------"
                % (self.name))

            distcpCommand = [
                "hadoop", "distcp", "-D",
                "yarn.timeline-service.enabled=false", "-D",
                "mapreduce.job.queuename=%s" % (yarnQueue), "-overwrite",
                "-delete",
                "%s" % (HDFSsourcePath),
                "%s" % (HDFStargetPath)
            ]

            log.info("Thread %s:  ______________________ " % (self.name))
            log.info("Thread %s: |                      |" % (self.name))
            log.info("Thread %s: | Hadoop distCp starts |" % (self.name))
            log.info("Thread %s: |______________________|" % (self.name))
            log.info("Thread %s: " % (self.name))

            # Start distcp
            sh_session = subprocess.Popen(distcpCommand,
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.STDOUT)
            distCPoutput = ""

            # Print Stdout and stderr while distcp is running
            while sh_session.poll() == None:
                row = sh_session.stdout.readline().decode('utf-8').rstrip()
                if row != "":
                    log.info("Thread %s: %s" % (self.name, row))
                    distCPoutput += row + "\n"
                    sys.stdout.flush()

            # Print what is left in output after distcp is finished
            for row in sh_session.stdout.readlines():
                row = row.decode('utf-8').rstrip()
                if row != "":
                    log.info("Thread %s: %s" % (self.name, row))
                    distCPoutput += row + "\n"
                    sys.stdout.flush()

            log.info("Thread %s:  _________________________ " % (self.name))
            log.info("Thread %s: |                         |" % (self.name))
            log.info("Thread %s: | Hadoop distCp completed |" % (self.name))
            log.info("Thread %s: |_________________________|" % (self.name))
            log.info("Thread %s: " % (self.name))

            disCPresult = False
            if " ERROR " in distCPoutput:
                log.error("Thread %s: ERROR detected during distCP copy." %
                          (self.name))
                failures = failures + 1
            elif " completed successfully" in distCPoutput:
                disCPresult = True
                failures = 0
            else:
                log.error(
                    "Thread %s: Unknown status of distCP. Marked as failure as it cant find that it was finished successful"
                    % (self.name))
                failures = failures + 1

            distCPresponse = {}
            distCPresponse["tableID"] = tableID
            distCPresponse["hiveDB"] = hiveDB
            distCPresponse["hiveTable"] = hiveTable
            distCPresponse["destination"] = destination
            distCPresponse["result"] = disCPresult
            distCPresponse["failures"] = failures

            self.distCPresQueue.put(distCPresponse)

        log.info("distCP %s stopped" % (self.name))

Example #8

Show file

File: daemon.py Project: Dundo777/DBImport

    def run(self):
        # This is the main event loop where the 'real' daemonwork happens
        log = logging.getLogger("server")
        log.debug("Executing daemon.serverDaemon.run()")
        log.info("Server initializing")
        self.mysql_conn = None
        self.mysql_cursor = None
        self.debugLogLevel = False

        if logging.root.level == 10:  # DEBUG
            self.debugLogLevel = True

        self.common_config = common_config.config()

        self.crypto = self.common_config.crypto
        self.crypto.setPrivateKeyFile(
            configuration.get("Credentials", "private_key"))
        self.crypto.setPublicKeyFile(
            configuration.get("Credentials", "public_key"))

        self.remoteDBImportEngines = {}
        self.remoteDBImportSessions = {}
        self.remoteInstanceConfigDB = None

        self.configDBSession = None
        self.configDBEngine = None

        self.distCPreqQueue = Queue()
        self.distCPresQueue = Queue()
        self.threadStopEvent = threading.Event()

        # Start the Atlas Discovery Thread
        self.atlasDiscoveryThread = atlasDiscovery.atlasDiscovery(
            self.threadStopEvent)
        self.atlasDiscoveryThread.daemon = True
        self.atlasDiscoveryThread.start()

        # Start the REST Server Thread
        self.restServerThread = restServer.restServer(self.threadStopEvent)
        self.restServerThread.daemon = True
        self.restServerThread.start()

        # Start the distCP threads
        if configuration.get("Server",
                             "distCP_separate_logs").lower() == "true":
            distCP_separate_logs = True
        else:
            distCP_separate_logs = False

        distCPobjects = []
        distCPthreads = int(configuration.get("Server", "distCP_threads"))
        if distCPthreads == 0:
            log.error(
                "'distCP_threads' configuration in configfile must be larger than 0"
            )
            sys.exit(1)

        log.info("Starting %s distCp threads" % (distCPthreads))

        for threadID in range(0, distCPthreads):
            if distCP_separate_logs == False:
                distCPlogName = "distCP"
            else:
                distCPlogName = "distCP-thread%s" % (str(threadID))

            thread = distCP(name=str(threadID),
                            distCPreqQueue=self.distCPreqQueue,
                            distCPresQueue=self.distCPresQueue,
                            threadStopEvent=self.threadStopEvent,
                            loggerName=distCPlogName)
            thread.daemon = True
            thread.start()
            distCPobjects.append(thread)

        # Fetch configuration about MySQL database and how to connect to it
        self.configHostname = configuration.get("Database", "mysql_hostname")
        self.configPort = configuration.get("Database", "mysql_port")
        self.configDatabase = configuration.get("Database", "mysql_database")
        self.configUsername = configuration.get("Database", "mysql_username")
        self.configPassword = configuration.get("Database", "mysql_password")

        # Set all rows that have copy_status = 1 to 0. This is needed in the startup as if they are 1 in this stage, it means that a previous
        # server marked it as 1 but didnt finish the copy. We need to retry that copy here and now
        try:
            updateDict = {}
            updateDict["last_status_update"] = str(
                datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f'))
            updateDict["copy_status"] = 0

            session = self.getDBImportSession()

            (session.query(configSchema.copyASyncStatus).filter(
                configSchema.copyASyncStatus.copy_status == 1).update(
                    updateDict))
            session.commit()
            session.close()

            log.debug("Init part of daemon.serverDaemon.run() completed")

            log.info("Server startup completed")

        except SQLAlchemyError as e:
            log.error(str(e.__dict__['orig']))
            log.error("Server startup failed")
            self.disconnectDBImportDB()

            # As we require this operation to be completed successful before entering the main loop, we will exit if there is a problem
            self.common_config.remove_temporary_files()
            sys.exit(1)

        except SQLerror:
            log.error("Server startup failed. Cant connect to config database")
            self.disconnectDBImportDB()
            self.common_config.remove_temporary_files()
            sys.exit(1)

        importTables = aliased(configSchema.importTables)
        dbimportInstances = aliased(configSchema.dbimportInstances)
        copyASyncStatus = aliased(configSchema.copyASyncStatus)

        while True:

            # ***********************************
            # Main Loop for server
            # ***********************************

            try:
                session = self.getDBImportSession()

                # status 0 = New data from import
                # status 1 = Data sent to distCP thread
                # status 2 = Data returned from distCP and was a failure
                # status 3 = Data returned from distCP and was a success

                # ------------------------------------------
                # Fetch all rows from copyASyncStatus that contains the status 0 and send them to distCP threads
                # ------------------------------------------

                # TODO: make the 1 min interval a configured param
                status2checkTimestamp = (
                    datetime.now() -
                    timedelta(minutes=1)).strftime('%Y-%m-%d %H:%M:%S.%f')

                aSyncRow = pd.DataFrame(
                    session.query(
                        copyASyncStatus.table_id, copyASyncStatus.hive_db,
                        copyASyncStatus.hive_table,
                        copyASyncStatus.destination, copyASyncStatus.failures,
                        copyASyncStatus.hdfs_source_path,
                        copyASyncStatus.hdfs_target_path).select_from(
                            copyASyncStatus).filter(
                                (copyASyncStatus.copy_status == 0)
                                | ((copyASyncStatus.copy_status == 2)
                                   & (copyASyncStatus.last_status_update <=
                                      status2checkTimestamp))).all())

                for index, row in aSyncRow.iterrows():

                    tableID = row['table_id']
                    destination = row['destination']
                    hiveDB = row['hive_db']
                    hiveTable = row['hive_table']
                    failures = row['failures']
                    HDFSsourcePath = row['hdfs_source_path']
                    HDFStargetPath = row['hdfs_target_path']

                    log.info("New sync request for table %s.%s" %
                             (hiveDB, hiveTable))

                    updateDict = {}
                    updateDict["copy_status"] = 1
                    updateDict["last_status_update"] = str(
                        datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f'))

                    (session.query(configSchema.copyASyncStatus).filter(
                        configSchema.copyASyncStatus.table_id == tableID).
                     filter(configSchema.copyASyncStatus.destination ==
                            destination).update(updateDict))
                    session.commit()

                    distCPrequest = {}
                    distCPrequest["tableID"] = tableID
                    distCPrequest["hiveDB"] = hiveDB
                    distCPrequest["hiveTable"] = hiveTable
                    distCPrequest["destination"] = destination
                    distCPrequest["failures"] = failures
                    distCPrequest["HDFSsourcePath"] = HDFSsourcePath
                    distCPrequest["HDFStargetPath"] = HDFStargetPath
                    self.distCPreqQueue.put(distCPrequest)

                    log.debug(
                        "Status changed to 1 for table %s.%s and sent to distCP threads"
                        % (hiveDB, hiveTable))

                session.close()

            except SQLAlchemyError as e:
                log.error(str(e.__dict__['orig']))
                session.rollback()
                self.disconnectDBImportDB()

            except SQLerror:
                self.disconnectDBImportDB()

            # ------------------------------------------
            # Read the response from the distCP threads
            # ------------------------------------------
            try:
                distCPresponse = self.distCPresQueue.get(block=False)
            except Empty:
                pass
            else:
                updateDict = {}
                updateDict["last_status_update"] = str(
                    datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f'))
                updateDict["failures"] = distCPresponse.get("failures")

                distCPresult = distCPresponse.get("result")
                if distCPresult == True:
                    updateDict["copy_status"] = 3
                else:
                    updateDict["copy_status"] = 2

                try:
                    session = self.getDBImportSession()
                    (session.query(configSchema.copyASyncStatus).filter(
                        configSchema.copyASyncStatus.table_id ==
                        distCPresponse.get('tableID')).filter(
                            configSchema.copyASyncStatus.destination ==
                            distCPresponse.get('destination')).update(
                                updateDict))
                    session.commit()
                    session.close()

                except SQLAlchemyError as e:
                    log.error(str(e.__dict__['orig']))
                    session.rollback()
                    self.disconnectDBImportDB()

                except SQLerror:
                    self.disconnectDBImportDB()

            # ------------------------------------------
            # Fetch all rows from copyASyncStatus that contains the status 3 and update the remote DBImport instance database
            # Also dlete the record from the copyASyncStatus table
            # ------------------------------------------

            try:
                session = self.getDBImportSession()
                aSyncRow = pd.DataFrame(
                    session.query(
                        copyASyncStatus.table_id, copyASyncStatus.hive_db,
                        copyASyncStatus.hive_table,
                        copyASyncStatus.destination, copyASyncStatus.failures,
                        copyASyncStatus.hdfs_source_path,
                        copyASyncStatus.hdfs_target_path).select_from(
                            copyASyncStatus).filter(
                                copyASyncStatus.copy_status == 3).all())
                session.close()

            except SQLAlchemyError as e:
                log.error(str(e.__dict__['orig']))
                session.rollback()
                self.disconnectDBImportDB()

            except SQLerror:
                self.disconnectDBImportDB()

            else:
                for index, row in aSyncRow.iterrows():

                    tableID = row['table_id']
                    destination = row['destination']
                    hiveDB = row['hive_db']
                    hiveTable = row['hive_table']
                    failures = row['failures']
                    HDFSsourcePath = row['hdfs_source_path']
                    HDFStargetPath = row['hdfs_target_path']

                    # Get the remote sessions. if sessions is not available, we just continue to the next item in the database
                    _remoteSession = self.getDBImportRemoteSession(destination)
                    if _remoteSession == None:
                        continue

                    try:
                        remoteSession = _remoteSession()

                        # Get the table_id from the table at the remote instance
                        remoteImportTableID = (remoteSession.query(
                            importTables.table_id
                        ).select_from(importTables).filter(
                            importTables.hive_db == hiveDB).filter(
                                importTables.hive_table == hiveTable).one())

                        remoteTableID = remoteImportTableID[0]

                        updateDict = {}
                        updateDict["copy_finished"] = str(
                            datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f'))

                        # Update the values in import_table on the remote instance
                        (remoteSession.query(configSchema.importTables).filter(
                            configSchema.importTables.table_id ==
                            remoteTableID).update(updateDict))
                        remoteSession.commit()

                        remoteSession.close()

                    except SQLAlchemyError as e:
                        log.error(str(e.__dict__['orig']))
                        remoteSession.rollback()
                        self.disconnectRemoteSession(destination)

                    else:
                        # Delete the record from copyASyncStatus
                        try:
                            session = self.getDBImportSession()
                            (session.query(
                                configSchema.copyASyncStatus).filter(
                                    configSchema.copyASyncStatus.table_id ==
                                    tableID).filter(
                                        configSchema.copyASyncStatus.
                                        destination == destination).delete())
                            session.commit()
                            session.close()

                        except SQLAlchemyError as e:
                            log.error(str(e.__dict__['orig']))
                            session.rollback()
                            self.disconnectDBImportDB()

                        except SQLerror:
                            self.disconnectDBImportDB()

                        else:
                            log.info(
                                "Table %s.%s copied successfully to '%s'" %
                                (hiveDB, hiveTable, destination))

            session.close()
            #			log.info("Starting wait")
            time.sleep(1)

        log.info("Server stopped")
        log.debug("Executing daemon.serverDaemon.run() - Finished")

Example #9

Show file

    def run(self):
        logger = "atlasDiscovery"
        log = logging.getLogger(logger)
        self.mysql_conn = None
        self.mysql_cursor = None
        self.configDBSession = None
        self.configDBEngine = None
        self.debugLogLevel = False
        atlasEnabled = True
        #		self.atlasOperation = atlas_operations.atlasOperation(logger)

        if logging.root.level == 10:  # DEBUG
            self.debugLogLevel = True

        self.atlasCrawlerProcessQueue = Queue()
        self.atlasCrawlerResultQueue = Queue()
        self.jdbcConnectionMutex = threading.Lock()

        # Fetch configuration about MySQL database and how to connect to it
        self.configHostname = configuration.get("Database", "mysql_hostname")
        self.configPort = configuration.get("Database", "mysql_port")
        self.configDatabase = configuration.get("Database", "mysql_database")
        self.configUsername = configuration.get("Database", "mysql_username")
        self.configPassword = configuration.get("Database", "mysql_password")

        atlasCrawlerObjects = []
        atlasCrawlerThreads = int(configuration.get("Server", "atlas_threads"))
        if atlasCrawlerThreads == 0:
            log.info(
                "Atlas discovery disabled as the number of threads is set to 0"
            )
            atlasEnabled = False
        else:
            log.info("Starting %s Atlas crawler threads" %
                     (atlasCrawlerThreads))

        for threadID in range(0, atlasCrawlerThreads):
            #			if distCP_separate_logs == False:
            atlasCrawlerLogName = "atlasCrawler-thread%s" % (str(threadID))
            #			else:
            #				distCPlogName = "distCP-thread%s"%(str(threadID))

            thread = atlasCrawler(
                name=str(threadID),
                atlasCrawlerProcessQueue=self.atlasCrawlerProcessQueue,
                atlasCrawlerResultQueue=self.atlasCrawlerResultQueue,
                threadStopEvent=self.threadStopEvent,
                loggerName=atlasCrawlerLogName,
                mutex=self.jdbcConnectionMutex)
            thread.daemon = True
            thread.start()
            atlasCrawlerObjects.append(thread)

        self.common_config = common_config.config()
        jdbcConnections = aliased(configSchema.jdbcConnections)
        self.failureLog = {}
        self.connectionsSentToCrawlers = []

        # The interval between the scans. This is in hours
        atlasDiscoveryInterval = self.common_config.getConfigValue(
            key="atlas_discovery_interval")

        #		if atlasEnabled == True:
        #			atlasEnabled = self.atlasOperation.checkAtlasSchema(logger=logger)

        if atlasEnabled == True:
            log.info("atlasDiscovery started")
            log.info("Atlas discovery interval is set to %s hours" %
                     (atlasDiscoveryInterval))

        while not self.threadStopEvent.isSet() and atlasEnabled == True:

            # ****************************************************************
            # Read data from jdbc_connection and put in queue for processing
            # ****************************************************************

            if self.atlasCrawlerProcessQueue.qsize() < atlasCrawlerThreads:
                # Only read the database if there isn't enough items in the queue to the crawlers to processes. This will save
                # a large number of sql requests if the queue is full
                try:
                    # Read a list of connection aliases that we are going to process in this iteration
                    session = self.getDBImportSession()
                    atlasDiscoveryCheckTime = datetime.utcnow() - timedelta(
                        hours=atlasDiscoveryInterval)

                    # TODO: Antagligen bara köra denna om jdbcConnectionsDf är tom från föregående körning
                    jdbcConnectionsDf = pd.DataFrame(
                        session.query(
                            jdbcConnections.dbalias,
                            jdbcConnections.atlas_last_discovery,
                            jdbcConnections.atlas_discovery,
                            jdbcConnections.contact_info,
                            jdbcConnections.description, jdbcConnections.owner,
                            jdbcConnections.atlas_include_filter,
                            jdbcConnections.atlas_exclude_filter).select_from(
                                jdbcConnections).filter(
                                    jdbcConnections.atlas_discovery == 1)
                        #						.filter((jdbcConnections.atlas_last_discovery < atlasDiscoveryCheckTime) | (jdbcConnections.atlas_last_discovery == None))
                        .order_by(jdbcConnections.atlas_last_discovery).all())
                    session.close()

                except SQLAlchemyError as e:
                    log.error(str(e.__dict__['orig']))
                    session.rollback()
                    self.disconnectDBImportDB()

                else:

                    for index, row in jdbcConnectionsDf.iterrows():
                        dbAlias = row['dbalias']

                        # TODO: Flytta denna till thread
                        if self.common_config.checkTimeWindow(
                                dbAlias, atlasDiscoveryMode=True) == False:
                            continue

                        # Find out if the dbAlias is blacklisted
                        if self.isConnectionBlacklisted(dbAlias) == True:
                            continue

                        if dbAlias in self.connectionsSentToCrawlers:
                            #							log.warning("This connection is already being processed. Skipping....")
                            continue

                        altasOperationFailed = False
                        printBlackListWarning = True

                        self.common_config.mysql_conn.commit()
                        try:
                            self.common_config.lookupConnectionAlias(dbAlias)
                        except invalidConfiguration as err:
                            if self.common_config.atlasJdbcSourceSupport == True:
                                log.error(
                                    "Connection '%s' have invalid configuration. Failed with '%s'"
                                    % (dbAlias, err))
                                altasOperationFailed = True

                        if self.common_config.atlasJdbcSourceSupport == False:
                            # This source type does not support Atlas discovery
                            log.debug(
                                "Connection '%s' does not support Atlas discovery. Skipping..."
                                % (dbAlias))
                            altasOperationFailed = True
                            printBlackListWarning = False

                        # Start the Jpype JVM as that needs to be running before the crawlers starts to use it
                        if jpype.isJVMStarted() == False:
                            log.debug("Starting jpype JVM")
                            self.common_config.connectToJDBC(
                                allJarFiles=True,
                                exitIfFailure=False,
                                logger=logger,
                                printError=False)
                            self.common_config.disconnectFromJDBC()

#						if altasOperationFailed == False and self.common_config.connectToJDBC(allJarFiles=True, exitIfFailure=False, logger=logger) == True:
                        if altasOperationFailed == False:
                            #							self.common_config.atlasEnabled = True
                            self.connectionsSentToCrawlers.append(dbAlias)

                            log.debug("Sending alias '%s' to queue" %
                                      (dbAlias))
                            atlasCrawlerRequest = {}
                            atlasCrawlerRequest["dbAlias"] = row['dbalias']
                            atlasCrawlerRequest["contactInfo"] = row[
                                'contact_info']
                            atlasCrawlerRequest["description"] = row[
                                'description']
                            atlasCrawlerRequest["owner"] = row['owner']
                            atlasCrawlerRequest["atlasIncludeFilter"] = row[
                                'atlas_include_filter']
                            atlasCrawlerRequest["atlasExcludeFilter"] = row[
                                'atlas_exclude_filter']
                            atlasCrawlerRequest[
                                "jdbc_hostname"] = self.common_config.jdbc_hostname
                            atlasCrawlerRequest[
                                "jdbc_port"] = self.common_config.jdbc_port
                            atlasCrawlerRequest[
                                "jdbc_servertype"] = self.common_config.jdbc_servertype
                            atlasCrawlerRequest[
                                "jdbc_database"] = self.common_config.jdbc_database
                            atlasCrawlerRequest[
                                "jdbc_oracle_sid"] = self.common_config.jdbc_oracle_sid
                            atlasCrawlerRequest[
                                "jdbc_oracle_servicename"] = self.common_config.jdbc_oracle_servicename
                            atlasCrawlerRequest[
                                "jdbc_username"] = self.common_config.jdbc_username
                            atlasCrawlerRequest[
                                "jdbc_password"] = self.common_config.jdbc_password
                            atlasCrawlerRequest[
                                "jdbc_driver"] = self.common_config.jdbc_driver
                            atlasCrawlerRequest[
                                "jdbc_url"] = self.common_config.jdbc_url
                            atlasCrawlerRequest[
                                "jdbc_classpath_for_python"] = self.common_config.jdbc_classpath_for_python
                            atlasCrawlerRequest[
                                "jdbc_environment"] = self.common_config.jdbc_environment
                            atlasCrawlerRequest["hdfs_address"] = None
                            atlasCrawlerRequest["cluster_name"] = None

                            self.atlasCrawlerProcessQueue.put(
                                atlasCrawlerRequest)

                        else:
                            #							altasOperationFailed = True

                            #						if altasOperationFailed == True:
                            self.blacklistConnection(dbAlias,
                                                     printBlackListWarning)

            # ********************************
            # Read response from atlasCrawler
            # ********************************

            try:
                atlasCrawlerResult = self.atlasCrawlerResultQueue.get(
                    block=False, timeout=1)
            except Empty:
                atlasCrawlerResult = None

            if atlasCrawlerResult is not None:
                dbAlias = atlasCrawlerResult.get('dbAlias')
                result = atlasCrawlerResult.get('result')
                blacklist = atlasCrawlerResult.get('blacklist')
                log.debug("atlasCrawlerResultQueue: %s" % (atlasCrawlerResult))

                self.connectionsSentToCrawlers.remove(dbAlias)

                if result == True:
                    updateDict = {}
                    updateDict["atlas_last_discovery"] = str(
                        datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f'))

                    try:
                        session = self.getDBImportSession()
                        (session.query(configSchema.jdbcConnections).filter(
                            configSchema.jdbcConnections.dbalias ==
                            dbAlias).update(updateDict))
                        session.commit()
                        session.close()

                    except SQLAlchemyError as e:
                        log.error(str(e.__dict__['orig']))
                        session.rollback()
                        self.disconnectDBImportDB()

                    else:
                        self.removeBlacklist(dbAlias)
                else:
                    if blacklist == True:
                        log.error(
                            "Connection '%s' failed during crawling of database schema"
                            % (dbAlias))
                        self.blacklistConnection(dbAlias)
                    else:
                        log.warning(
                            "A Warning was detected when crawling connection '%s'. It will not be marked as completed and will retry the operation"
                            % (dbAlias))

            time.sleep(1)

        self.disconnectDBImportDB()
        if atlasEnabled == True:
            log.info("atlasDiscovery stopped")

Example #10

Show file

    def run(self):
        logger = "atlasDiscovery"
        log = logging.getLogger(logger)
        #		log.info("atlasDiscovery started")
        self.mysql_conn = None
        self.mysql_cursor = None
        self.configDBSession = None
        self.configDBEngine = None
        self.debugLogLevel = False

        if logging.root.level == 10:  # DEBUG
            self.debugLogLevel = True

        # Fetch configuration about MySQL database and how to connect to it
        self.configHostname = configuration.get("Database", "mysql_hostname")
        self.configPort = configuration.get("Database", "mysql_port")
        self.configDatabase = configuration.get("Database", "mysql_database")
        self.configUsername = configuration.get("Database", "mysql_username")
        self.configPassword = configuration.get("Database", "mysql_password")

        self.common_config = common_config.config()

        jdbcConnections = aliased(configSchema.jdbcConnections)

        failureLog = {}

        # The interval between the scans. This is in hours
        atlasDiscoveryInterval = self.common_config.getConfigValue(
            key="atlas_discovery_interval")

        atlasEnabled = self.common_config.checkAtlasSchema(logger=logger)
        if atlasEnabled == True:
            log.info("atlasDiscovery started")
            log.info("Atlas discovery interval is set to %s hours" %
                     (atlasDiscoveryInterval))

        while not self.threadStopEvent.isSet() and atlasEnabled == True:

            try:
                session = self.getDBImportSession()
                atlasDiscoveryCheckTime = datetime.utcnow() - timedelta(
                    hours=atlasDiscoveryInterval)

                jdbcConnectionsDf = pd.DataFrame(
                    session.query(jdbcConnections.dbalias,
                                  jdbcConnections.timewindow_start,
                                  jdbcConnections.timewindow_stop,
                                  jdbcConnections.atlas_last_discovery,
                                  jdbcConnections.atlas_discovery).
                    select_from(jdbcConnections).filter(
                        jdbcConnections.atlas_discovery == 1).filter(
                            (jdbcConnections.atlas_last_discovery <
                             atlasDiscoveryCheckTime)
                            | (jdbcConnections.atlas_last_discovery == None)).
                    order_by(jdbcConnections.atlas_last_discovery).all())
                session.close()

            except SQLAlchemyError as e:
                log.error(str(e.__dict__['orig']))
                session.rollback()
                self.disconnectDBImportDB()

            else:

                for index, row in jdbcConnectionsDf.iterrows():
                    dbAlias = row['dbalias']

                    currentTime = str(datetime.now().strftime('%H:%M:%S'))
                    timeWindowStart = None
                    timeWindowStop = None
                    dbAliasAllowedAtThisTime = False

                    if row['timewindow_start'] != None:
                        timeWindowStart = str(row['timewindow_start'])
                    if row['timewindow_stop'] != None:
                        timeWindowStop = str(row['timewindow_stop'])

                    if timeWindowStart != None and re.search(
                            '^[0-9]:', timeWindowStart):
                        timeWindowStart = "0" + timeWindowStart
                    if timeWindowStop != None and re.search(
                            '^[0-9]:', timeWindowStop):
                        timeWindowStop = "0" + timeWindowStop

                    if timeWindowStart == None and timeWindowStop == None:
                        dbAliasAllowedAtThisTime = True

                    elif currentTime > timeWindowStart and currentTime < timeWindowStop:
                        dbAliasAllowedAtThisTime = True

                    # Find out if the dbAlias is blacklisted
                    if failureLog.get(dbAlias, None) != None:
                        blackListEnableTime = failureLog[dbAlias][
                            'blackListStart'] + timedelta(
                                hours=failureLog[dbAlias]['blackListTime'])

                        if datetime.now() < blackListEnableTime:
                            # This dbAlias is still blacklisted
                            continue

                    if dbAliasAllowedAtThisTime == False:
                        # Not allowed to access this connection at this time
                        continue

                    self.common_config.mysql_conn.commit()
                    self.common_config.lookupConnectionAlias(dbAlias)

                    if self.common_config.atlasJdbcSourceSupport == False:
                        # This source type does not support Atlas discovery
                        continue

                    # We now have a valid connection in dbAlias that we can do a discovery on
                    log.info("Starting a Atlas discovery on connection '%s'" %
                             (dbAlias))

                    altasOperationFailed = False
                    if self.common_config.connectToJDBC(allJarFiles=True,
                                                        exitIfFailure=False,
                                                        logger=logger) == True:
                        self.common_config.atlasEnabled = True
                        response = self.common_config.discoverAtlasRdbms(
                            dbAlias=dbAlias, logger=logger)
                        if response == False:
                            # Something went wrong when getting source system schema
                            altasOperationFailed = True
                            log.warning(
                                "There was an error/warning when discovering source schema"
                            )
                        else:
                            log.info(
                                "Finished Atlas discovery on connection '%s'" %
                                (dbAlias))

                        self.common_config.disconnectFromJDBC()

                        if altasOperationFailed == False:
                            updateDict = {}
                            updateDict["atlas_last_discovery"] = str(
                                datetime.now().strftime(
                                    '%Y-%m-%d %H:%M:%S.%f'))

                            try:
                                session = self.getDBImportSession()
                                (session.query(
                                    configSchema.jdbcConnections).filter(
                                        configSchema.jdbcConnections.dbalias ==
                                        dbAlias).update(updateDict))
                                session.commit()
                                session.close()

                            except SQLAlchemyError as e:
                                log.error(str(e.__dict__['orig']))
                                session.rollback()
                                self.disconnectDBImportDB()

                            else:
                                failureLog.pop(dbAlias, None)
                                break
                    else:
                        altasOperationFailed = True

                    if altasOperationFailed == True:

                        # Connection failed. We need to blacklist this connection for some time
                        blackListData = failureLog.get(dbAlias, None)
                        if blackListData == None:
                            blackListTime = 1
                        else:
                            blackListTime = failureLog[dbAlias][
                                'blackListTime'] * 2

                            # Max blacklist time is 24 hours
                            if blackListTime > 24: blackListTime = 24

                        failureLog[dbAlias] = {
                            'blackListTime': blackListTime,
                            'blackListStart': datetime.now()
                        }

                        log.warning(
                            "Atlas Discovery failed on connection '%s'" %
                            (dbAlias))
                        log.warning(
                            "This connection is now blacklisted for %s hours" %
                            (failureLog[dbAlias]['blackListTime']))

            time.sleep(1)

        self.disconnectDBImportDB()
        if atlasEnabled == True:
            log.info("atlasDiscovery stopped")