def __init__(self): logging.debug("Executing rest.__init__()") self.common_config = common_config.config() self.RESTheaders = {'Content-type': 'application/json'} self.RESTendpoint = self.common_config.getConfigValue(key="rest_url") self.RESTtimeout = self.common_config.getConfigValue( key="rest_timeout") self.RESTverifySSL = self.common_config.getConfigValue( key="rest_verifyssl") self.RESTtrustCAfile = self.common_config.getConfigValue( key="rest_trustcafile") self.kafkaBrokers = self.common_config.getConfigValue( key="kafka_brokers") self.kafkaTrustCAFile = self.common_config.getConfigValue( key="kafka_trustcafile") self.kafkaSecurityProtocol = self.common_config.getConfigValue( key="kafka_securityprotocol") self.kafkaSaslMechanism = self.common_config.getConfigValue( key="kafka_saslmechanism") self.kafkaTopic = self.common_config.getConfigValue(key="kafka_topic") if self.RESTendpoint == "": logging.error( "Cant find the REST endpoint. Please check configuration file") sys.exit(1) if self.RESTtimeout == "": logging.error( "Cant find the REST endpoint timeout. Please check configuration file" ) sys.exit(1)
def __init__(self, Hive_DB=None, Hive_Table=None): logging.debug("Executing common_operation.__init__()") self.Hive_DB = Hive_DB self.Hive_Table = Hive_Table # self.mysql_conn = None # self.mysql_cursor = None self.hive_conn = None self.hive_cursor = None self.debugLogLevel = False if logging.root.level == 10: # DEBUG self.debugLogLevel = True # Fetch and initialize the Kerberos configuration self.kerberosPrincipal = configuration.get("Kerberos", "principal") self.webHCatAuth = HTTPKerberosAuth(force_preemptive=True, principal=self.kerberosPrincipal) self.common_config = common_config.config() # Fetch configuration details about Hive LLAP # self.hive_hostname = configuration.get("Hive", "hostname") # self.hive_port = configuration.get("Hive", "port") self.hive_servers = configuration.get("Hive", "servers") self.hive_kerberos_service_name = configuration.get("Hive", "kerberos_service_name") self.hive_kerberos_realm = configuration.get("Hive", "kerberos_realm") self.hive_print_messages = self.common_config.getConfigValue(key = "hive_print_messages") if configuration.get("Hive", "use_ssl").lower() == "true": self.hive_use_ssl = True else: self.hive_use_ssl = False self.hive_min_buckets = int(configuration.get("Hive", "min_buckets")) self.hive_max_buckets = int(configuration.get("Hive", "max_buckets")) # HDFS Settings self.hdfs_address = self.common_config.getConfigValue(key = "hdfs_address") self.hdfs_basedir = self.common_config.getConfigValue(key = "hdfs_basedir") self.hdfs_blocksize = self.common_config.getConfigValue(key = "hdfs_blocksize") self.hiveConnectStr = configuration.get("Hive", "hive_metastore_alchemy_conn") try: self.hiveMetaDB = sa.create_engine(self.hiveConnectStr, echo = self.debugLogLevel) self.hiveMetaDB.connect() self.hiveMetaSession = sessionmaker(bind=self.hiveMetaDB) except sa.exc.OperationalError as err: logging.error("%s"%err) self.common_config.remove_temporary_files() sys.exit(1) except: print("Unexpected error: ") print(sys.exc_info()) self.common_config.remove_temporary_files() sys.exit(1) logging.debug("Executing common_operations.__init__() - Finished")
def __init__(self, mysql_conn, connectionAlias=None, targetSchema=None, targetTable=None): logging.debug("Executing stage.__init__()") self.connectionAlias = connectionAlias self.targetSchema = targetSchema self.targetTable = targetTable self.mysql_conn = mysql_conn self.mysql_cursor = self.mysql_conn.cursor(buffered=False) self.currentStage = None self.memoryStage = False self.stageTimeStart = None self.stageTimeStop = None self.stageDurationStart = float() self.stageDurationStop = float() self.stageDurationTime = float() self.common_config = common_config.config() self.sendStatistics = sendStatistics.sendStatistics()
def run(self): # This is the main event loop where the 'real' daemonwork happens log = logging.getLogger("server") log.debug("Executing daemon.serverDaemon.run()") log.info("Server initializing") self.mysql_conn = None self.mysql_cursor = None self.debugLogLevel = False if logging.root.level == 10: # DEBUG self.debugLogLevel = True self.common_config = common_config.config() self.crypto = self.common_config.crypto self.crypto.setPrivateKeyFile( configuration.get("Credentials", "private_key")) self.crypto.setPublicKeyFile( configuration.get("Credentials", "public_key")) self.remoteDBImportEngines = {} self.remoteDBImportSessions = {} self.remoteInstanceConfigDB = None self.configDBSession = None self.configDBEngine = None self.distCPreqQueue = Queue() self.distCPresQueue = Queue() self.threadStopEvent = threading.Event() # Start the Atlas Discovery Thread self.atlasDiscoveryThread = atlasDiscovery.atlasDiscovery( self.threadStopEvent) self.atlasDiscoveryThread.daemon = True self.atlasDiscoveryThread.start() # Start the REST Server Thread self.restServerThread = restServer.restServer(self.threadStopEvent) self.restServerThread.daemon = True self.restServerThread.start() # Start the distCP threads if configuration.get("Server", "distCP_separate_logs").lower() == "true": distCP_separate_logs = True else: distCP_separate_logs = False distCPobjects = [] distCPthreads = int(configuration.get("Server", "distCP_threads")) if distCPthreads == 0: log.error( "'distCP_threads' configuration in configfile must be larger than 0" ) sys.exit(1) log.info("Starting %s distCp threads" % (distCPthreads)) for threadID in range(0, distCPthreads): if distCP_separate_logs == False: distCPlogName = "distCP" else: distCPlogName = "distCP-thread%s" % (str(threadID)) thread = distCP(name=str(threadID), distCPreqQueue=self.distCPreqQueue, distCPresQueue=self.distCPresQueue, threadStopEvent=self.threadStopEvent, loggerName=distCPlogName) thread.daemon = True thread.start() distCPobjects.append(thread) # Fetch configuration about MySQL database and how to connect to it self.configHostname = configuration.get("Database", "mysql_hostname") self.configPort = configuration.get("Database", "mysql_port") self.configDatabase = configuration.get("Database", "mysql_database") self.configUsername = configuration.get("Database", "mysql_username") self.configPassword = configuration.get("Database", "mysql_password") # Set all rows that have copy_status = 1 to 0. This is needed in the startup as if they are 1 in this stage, it means that a previous # server marked it as 1 but didnt finish the copy. We need to retry that copy here and now try: updateDict = {} updateDict["last_status_update"] = str( datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')) updateDict["copy_status"] = 0 session = self.getDBImportSession() (session.query(configSchema.copyASyncStatus).filter( configSchema.copyASyncStatus.copy_status == 1).update( updateDict)) session.commit() session.close() log.debug("Init part of daemon.serverDaemon.run() completed") log.info("Server startup completed") except SQLAlchemyError as e: log.error(str(e.__dict__['orig'])) log.error("Server startup failed") self.disconnectDBImportDB() # As we require this operation to be completed successful before entering the main loop, we will exit if there is a problem self.common_config.remove_temporary_files() sys.exit(1) except SQLerror: log.error("Server startup failed. Cant connect to config database") self.disconnectDBImportDB() self.common_config.remove_temporary_files() sys.exit(1) importTables = aliased(configSchema.importTables) dbimportInstances = aliased(configSchema.dbimportInstances) copyASyncStatus = aliased(configSchema.copyASyncStatus) while True: # *********************************** # Main Loop for server # *********************************** try: session = self.getDBImportSession() # status 0 = New data from import # status 1 = Data sent to distCP thread # status 2 = Data returned from distCP and was a failure # status 3 = Data returned from distCP and was a success # ------------------------------------------ # Fetch all rows from copyASyncStatus that contains the status 0 and send them to distCP threads # ------------------------------------------ # TODO: make the 1 min interval a configured param status2checkTimestamp = ( datetime.now() - timedelta(minutes=1)).strftime('%Y-%m-%d %H:%M:%S.%f') aSyncRow = pd.DataFrame( session.query( copyASyncStatus.table_id, copyASyncStatus.hive_db, copyASyncStatus.hive_table, copyASyncStatus.destination, copyASyncStatus.failures, copyASyncStatus.hdfs_source_path, copyASyncStatus.hdfs_target_path).select_from( copyASyncStatus).filter( (copyASyncStatus.copy_status == 0) | ((copyASyncStatus.copy_status == 2) & (copyASyncStatus.last_status_update <= status2checkTimestamp))).all()) for index, row in aSyncRow.iterrows(): tableID = row['table_id'] destination = row['destination'] hiveDB = row['hive_db'] hiveTable = row['hive_table'] failures = row['failures'] HDFSsourcePath = row['hdfs_source_path'] HDFStargetPath = row['hdfs_target_path'] log.info("New sync request for table %s.%s" % (hiveDB, hiveTable)) updateDict = {} updateDict["copy_status"] = 1 updateDict["last_status_update"] = str( datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')) (session.query(configSchema.copyASyncStatus).filter( configSchema.copyASyncStatus.table_id == tableID). filter(configSchema.copyASyncStatus.destination == destination).update(updateDict)) session.commit() distCPrequest = {} distCPrequest["tableID"] = tableID distCPrequest["hiveDB"] = hiveDB distCPrequest["hiveTable"] = hiveTable distCPrequest["destination"] = destination distCPrequest["failures"] = failures distCPrequest["HDFSsourcePath"] = HDFSsourcePath distCPrequest["HDFStargetPath"] = HDFStargetPath self.distCPreqQueue.put(distCPrequest) log.debug( "Status changed to 1 for table %s.%s and sent to distCP threads" % (hiveDB, hiveTable)) session.close() except SQLAlchemyError as e: log.error(str(e.__dict__['orig'])) session.rollback() self.disconnectDBImportDB() except SQLerror: self.disconnectDBImportDB() # ------------------------------------------ # Read the response from the distCP threads # ------------------------------------------ try: distCPresponse = self.distCPresQueue.get(block=False) except Empty: pass else: updateDict = {} updateDict["last_status_update"] = str( datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')) updateDict["failures"] = distCPresponse.get("failures") distCPresult = distCPresponse.get("result") if distCPresult == True: updateDict["copy_status"] = 3 else: updateDict["copy_status"] = 2 try: session = self.getDBImportSession() (session.query(configSchema.copyASyncStatus).filter( configSchema.copyASyncStatus.table_id == distCPresponse.get('tableID')).filter( configSchema.copyASyncStatus.destination == distCPresponse.get('destination')).update( updateDict)) session.commit() session.close() except SQLAlchemyError as e: log.error(str(e.__dict__['orig'])) session.rollback() self.disconnectDBImportDB() except SQLerror: self.disconnectDBImportDB() # ------------------------------------------ # Fetch all rows from copyASyncStatus that contains the status 3 and update the remote DBImport instance database # Also dlete the record from the copyASyncStatus table # ------------------------------------------ try: session = self.getDBImportSession() aSyncRow = pd.DataFrame( session.query( copyASyncStatus.table_id, copyASyncStatus.hive_db, copyASyncStatus.hive_table, copyASyncStatus.destination, copyASyncStatus.failures, copyASyncStatus.hdfs_source_path, copyASyncStatus.hdfs_target_path).select_from( copyASyncStatus).filter( copyASyncStatus.copy_status == 3).all()) session.close() except SQLAlchemyError as e: log.error(str(e.__dict__['orig'])) session.rollback() self.disconnectDBImportDB() except SQLerror: self.disconnectDBImportDB() else: for index, row in aSyncRow.iterrows(): tableID = row['table_id'] destination = row['destination'] hiveDB = row['hive_db'] hiveTable = row['hive_table'] failures = row['failures'] HDFSsourcePath = row['hdfs_source_path'] HDFStargetPath = row['hdfs_target_path'] # Get the remote sessions. if sessions is not available, we just continue to the next item in the database _remoteSession = self.getDBImportRemoteSession(destination) if _remoteSession == None: continue try: remoteSession = _remoteSession() # Get the table_id from the table at the remote instance remoteImportTableID = (remoteSession.query( importTables.table_id ).select_from(importTables).filter( importTables.hive_db == hiveDB).filter( importTables.hive_table == hiveTable).one()) remoteTableID = remoteImportTableID[0] updateDict = {} updateDict["copy_finished"] = str( datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')) # Update the values in import_table on the remote instance (remoteSession.query(configSchema.importTables).filter( configSchema.importTables.table_id == remoteTableID).update(updateDict)) remoteSession.commit() remoteSession.close() except SQLAlchemyError as e: log.error(str(e.__dict__['orig'])) remoteSession.rollback() self.disconnectRemoteSession(destination) else: # Delete the record from copyASyncStatus try: session = self.getDBImportSession() (session.query( configSchema.copyASyncStatus).filter( configSchema.copyASyncStatus.table_id == tableID).filter( configSchema.copyASyncStatus. destination == destination).delete()) session.commit() session.close() except SQLAlchemyError as e: log.error(str(e.__dict__['orig'])) session.rollback() self.disconnectDBImportDB() except SQLerror: self.disconnectDBImportDB() else: log.info( "Table %s.%s copied successfully to '%s'" % (hiveDB, hiveTable, destination)) session.close() # log.info("Starting wait") time.sleep(1) log.info("Server stopped") log.debug("Executing daemon.serverDaemon.run() - Finished")
def __init__(self): self.common_config = common_config.config()
def run(self): logger = "atlasDiscovery" log = logging.getLogger(logger) self.mysql_conn = None self.mysql_cursor = None self.configDBSession = None self.configDBEngine = None self.debugLogLevel = False atlasEnabled = True # self.atlasOperation = atlas_operations.atlasOperation(logger) if logging.root.level == 10: # DEBUG self.debugLogLevel = True self.atlasCrawlerProcessQueue = Queue() self.atlasCrawlerResultQueue = Queue() self.jdbcConnectionMutex = threading.Lock() # Fetch configuration about MySQL database and how to connect to it self.configHostname = configuration.get("Database", "mysql_hostname") self.configPort = configuration.get("Database", "mysql_port") self.configDatabase = configuration.get("Database", "mysql_database") self.configUsername = configuration.get("Database", "mysql_username") self.configPassword = configuration.get("Database", "mysql_password") atlasCrawlerObjects = [] atlasCrawlerThreads = int(configuration.get("Server", "atlas_threads")) if atlasCrawlerThreads == 0: log.info( "Atlas discovery disabled as the number of threads is set to 0" ) atlasEnabled = False else: log.info("Starting %s Atlas crawler threads" % (atlasCrawlerThreads)) for threadID in range(0, atlasCrawlerThreads): # if distCP_separate_logs == False: atlasCrawlerLogName = "atlasCrawler-thread%s" % (str(threadID)) # else: # distCPlogName = "distCP-thread%s"%(str(threadID)) thread = atlasCrawler( name=str(threadID), atlasCrawlerProcessQueue=self.atlasCrawlerProcessQueue, atlasCrawlerResultQueue=self.atlasCrawlerResultQueue, threadStopEvent=self.threadStopEvent, loggerName=atlasCrawlerLogName, mutex=self.jdbcConnectionMutex) thread.daemon = True thread.start() atlasCrawlerObjects.append(thread) self.common_config = common_config.config() jdbcConnections = aliased(configSchema.jdbcConnections) self.failureLog = {} self.connectionsSentToCrawlers = [] # The interval between the scans. This is in hours atlasDiscoveryInterval = self.common_config.getConfigValue( key="atlas_discovery_interval") # if atlasEnabled == True: # atlasEnabled = self.atlasOperation.checkAtlasSchema(logger=logger) if atlasEnabled == True: log.info("atlasDiscovery started") log.info("Atlas discovery interval is set to %s hours" % (atlasDiscoveryInterval)) while not self.threadStopEvent.isSet() and atlasEnabled == True: # **************************************************************** # Read data from jdbc_connection and put in queue for processing # **************************************************************** if self.atlasCrawlerProcessQueue.qsize() < atlasCrawlerThreads: # Only read the database if there isn't enough items in the queue to the crawlers to processes. This will save # a large number of sql requests if the queue is full try: # Read a list of connection aliases that we are going to process in this iteration session = self.getDBImportSession() atlasDiscoveryCheckTime = datetime.utcnow() - timedelta( hours=atlasDiscoveryInterval) # TODO: Antagligen bara köra denna om jdbcConnectionsDf är tom från föregående körning jdbcConnectionsDf = pd.DataFrame( session.query( jdbcConnections.dbalias, jdbcConnections.atlas_last_discovery, jdbcConnections.atlas_discovery, jdbcConnections.contact_info, jdbcConnections.description, jdbcConnections.owner, jdbcConnections.atlas_include_filter, jdbcConnections.atlas_exclude_filter).select_from( jdbcConnections).filter( jdbcConnections.atlas_discovery == 1) # .filter((jdbcConnections.atlas_last_discovery < atlasDiscoveryCheckTime) | (jdbcConnections.atlas_last_discovery == None)) .order_by(jdbcConnections.atlas_last_discovery).all()) session.close() except SQLAlchemyError as e: log.error(str(e.__dict__['orig'])) session.rollback() self.disconnectDBImportDB() else: for index, row in jdbcConnectionsDf.iterrows(): dbAlias = row['dbalias'] # TODO: Flytta denna till thread if self.common_config.checkTimeWindow( dbAlias, atlasDiscoveryMode=True) == False: continue # Find out if the dbAlias is blacklisted if self.isConnectionBlacklisted(dbAlias) == True: continue if dbAlias in self.connectionsSentToCrawlers: # log.warning("This connection is already being processed. Skipping....") continue altasOperationFailed = False printBlackListWarning = True self.common_config.mysql_conn.commit() try: self.common_config.lookupConnectionAlias(dbAlias) except invalidConfiguration as err: if self.common_config.atlasJdbcSourceSupport == True: log.error( "Connection '%s' have invalid configuration. Failed with '%s'" % (dbAlias, err)) altasOperationFailed = True if self.common_config.atlasJdbcSourceSupport == False: # This source type does not support Atlas discovery log.debug( "Connection '%s' does not support Atlas discovery. Skipping..." % (dbAlias)) altasOperationFailed = True printBlackListWarning = False # Start the Jpype JVM as that needs to be running before the crawlers starts to use it if jpype.isJVMStarted() == False: log.debug("Starting jpype JVM") self.common_config.connectToJDBC( allJarFiles=True, exitIfFailure=False, logger=logger, printError=False) self.common_config.disconnectFromJDBC() # if altasOperationFailed == False and self.common_config.connectToJDBC(allJarFiles=True, exitIfFailure=False, logger=logger) == True: if altasOperationFailed == False: # self.common_config.atlasEnabled = True self.connectionsSentToCrawlers.append(dbAlias) log.debug("Sending alias '%s' to queue" % (dbAlias)) atlasCrawlerRequest = {} atlasCrawlerRequest["dbAlias"] = row['dbalias'] atlasCrawlerRequest["contactInfo"] = row[ 'contact_info'] atlasCrawlerRequest["description"] = row[ 'description'] atlasCrawlerRequest["owner"] = row['owner'] atlasCrawlerRequest["atlasIncludeFilter"] = row[ 'atlas_include_filter'] atlasCrawlerRequest["atlasExcludeFilter"] = row[ 'atlas_exclude_filter'] atlasCrawlerRequest[ "jdbc_hostname"] = self.common_config.jdbc_hostname atlasCrawlerRequest[ "jdbc_port"] = self.common_config.jdbc_port atlasCrawlerRequest[ "jdbc_servertype"] = self.common_config.jdbc_servertype atlasCrawlerRequest[ "jdbc_database"] = self.common_config.jdbc_database atlasCrawlerRequest[ "jdbc_oracle_sid"] = self.common_config.jdbc_oracle_sid atlasCrawlerRequest[ "jdbc_oracle_servicename"] = self.common_config.jdbc_oracle_servicename atlasCrawlerRequest[ "jdbc_username"] = self.common_config.jdbc_username atlasCrawlerRequest[ "jdbc_password"] = self.common_config.jdbc_password atlasCrawlerRequest[ "jdbc_driver"] = self.common_config.jdbc_driver atlasCrawlerRequest[ "jdbc_url"] = self.common_config.jdbc_url atlasCrawlerRequest[ "jdbc_classpath_for_python"] = self.common_config.jdbc_classpath_for_python atlasCrawlerRequest[ "jdbc_environment"] = self.common_config.jdbc_environment atlasCrawlerRequest["hdfs_address"] = None atlasCrawlerRequest["cluster_name"] = None self.atlasCrawlerProcessQueue.put( atlasCrawlerRequest) else: # altasOperationFailed = True # if altasOperationFailed == True: self.blacklistConnection(dbAlias, printBlackListWarning) # ******************************** # Read response from atlasCrawler # ******************************** try: atlasCrawlerResult = self.atlasCrawlerResultQueue.get( block=False, timeout=1) except Empty: atlasCrawlerResult = None if atlasCrawlerResult is not None: dbAlias = atlasCrawlerResult.get('dbAlias') result = atlasCrawlerResult.get('result') blacklist = atlasCrawlerResult.get('blacklist') log.debug("atlasCrawlerResultQueue: %s" % (atlasCrawlerResult)) self.connectionsSentToCrawlers.remove(dbAlias) if result == True: updateDict = {} updateDict["atlas_last_discovery"] = str( datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')) try: session = self.getDBImportSession() (session.query(configSchema.jdbcConnections).filter( configSchema.jdbcConnections.dbalias == dbAlias).update(updateDict)) session.commit() session.close() except SQLAlchemyError as e: log.error(str(e.__dict__['orig'])) session.rollback() self.disconnectDBImportDB() else: self.removeBlacklist(dbAlias) else: if blacklist == True: log.error( "Connection '%s' failed during crawling of database schema" % (dbAlias)) self.blacklistConnection(dbAlias) else: log.warning( "A Warning was detected when crawling connection '%s'. It will not be marked as completed and will retry the operation" % (dbAlias)) time.sleep(1) self.disconnectDBImportDB() if atlasEnabled == True: log.info("atlasDiscovery stopped")
def run(self): logger = "atlasDiscovery" log = logging.getLogger(logger) # log.info("atlasDiscovery started") self.mysql_conn = None self.mysql_cursor = None self.configDBSession = None self.configDBEngine = None self.debugLogLevel = False if logging.root.level == 10: # DEBUG self.debugLogLevel = True # Fetch configuration about MySQL database and how to connect to it self.configHostname = configuration.get("Database", "mysql_hostname") self.configPort = configuration.get("Database", "mysql_port") self.configDatabase = configuration.get("Database", "mysql_database") self.configUsername = configuration.get("Database", "mysql_username") self.configPassword = configuration.get("Database", "mysql_password") self.common_config = common_config.config() jdbcConnections = aliased(configSchema.jdbcConnections) failureLog = {} # The interval between the scans. This is in hours atlasDiscoveryInterval = self.common_config.getConfigValue( key="atlas_discovery_interval") atlasEnabled = self.common_config.checkAtlasSchema(logger=logger) if atlasEnabled == True: log.info("atlasDiscovery started") log.info("Atlas discovery interval is set to %s hours" % (atlasDiscoveryInterval)) while not self.threadStopEvent.isSet() and atlasEnabled == True: try: session = self.getDBImportSession() atlasDiscoveryCheckTime = datetime.utcnow() - timedelta( hours=atlasDiscoveryInterval) jdbcConnectionsDf = pd.DataFrame( session.query(jdbcConnections.dbalias, jdbcConnections.timewindow_start, jdbcConnections.timewindow_stop, jdbcConnections.atlas_last_discovery, jdbcConnections.atlas_discovery). select_from(jdbcConnections).filter( jdbcConnections.atlas_discovery == 1).filter( (jdbcConnections.atlas_last_discovery < atlasDiscoveryCheckTime) | (jdbcConnections.atlas_last_discovery == None)). order_by(jdbcConnections.atlas_last_discovery).all()) session.close() except SQLAlchemyError as e: log.error(str(e.__dict__['orig'])) session.rollback() self.disconnectDBImportDB() else: for index, row in jdbcConnectionsDf.iterrows(): dbAlias = row['dbalias'] currentTime = str(datetime.now().strftime('%H:%M:%S')) timeWindowStart = None timeWindowStop = None dbAliasAllowedAtThisTime = False if row['timewindow_start'] != None: timeWindowStart = str(row['timewindow_start']) if row['timewindow_stop'] != None: timeWindowStop = str(row['timewindow_stop']) if timeWindowStart != None and re.search( '^[0-9]:', timeWindowStart): timeWindowStart = "0" + timeWindowStart if timeWindowStop != None and re.search( '^[0-9]:', timeWindowStop): timeWindowStop = "0" + timeWindowStop if timeWindowStart == None and timeWindowStop == None: dbAliasAllowedAtThisTime = True elif currentTime > timeWindowStart and currentTime < timeWindowStop: dbAliasAllowedAtThisTime = True # Find out if the dbAlias is blacklisted if failureLog.get(dbAlias, None) != None: blackListEnableTime = failureLog[dbAlias][ 'blackListStart'] + timedelta( hours=failureLog[dbAlias]['blackListTime']) if datetime.now() < blackListEnableTime: # This dbAlias is still blacklisted continue if dbAliasAllowedAtThisTime == False: # Not allowed to access this connection at this time continue self.common_config.mysql_conn.commit() self.common_config.lookupConnectionAlias(dbAlias) if self.common_config.atlasJdbcSourceSupport == False: # This source type does not support Atlas discovery continue # We now have a valid connection in dbAlias that we can do a discovery on log.info("Starting a Atlas discovery on connection '%s'" % (dbAlias)) altasOperationFailed = False if self.common_config.connectToJDBC(allJarFiles=True, exitIfFailure=False, logger=logger) == True: self.common_config.atlasEnabled = True response = self.common_config.discoverAtlasRdbms( dbAlias=dbAlias, logger=logger) if response == False: # Something went wrong when getting source system schema altasOperationFailed = True log.warning( "There was an error/warning when discovering source schema" ) else: log.info( "Finished Atlas discovery on connection '%s'" % (dbAlias)) self.common_config.disconnectFromJDBC() if altasOperationFailed == False: updateDict = {} updateDict["atlas_last_discovery"] = str( datetime.now().strftime( '%Y-%m-%d %H:%M:%S.%f')) try: session = self.getDBImportSession() (session.query( configSchema.jdbcConnections).filter( configSchema.jdbcConnections.dbalias == dbAlias).update(updateDict)) session.commit() session.close() except SQLAlchemyError as e: log.error(str(e.__dict__['orig'])) session.rollback() self.disconnectDBImportDB() else: failureLog.pop(dbAlias, None) break else: altasOperationFailed = True if altasOperationFailed == True: # Connection failed. We need to blacklist this connection for some time blackListData = failureLog.get(dbAlias, None) if blackListData == None: blackListTime = 1 else: blackListTime = failureLog[dbAlias][ 'blackListTime'] * 2 # Max blacklist time is 24 hours if blackListTime > 24: blackListTime = 24 failureLog[dbAlias] = { 'blackListTime': blackListTime, 'blackListStart': datetime.now() } log.warning( "Atlas Discovery failed on connection '%s'" % (dbAlias)) log.warning( "This connection is now blacklisted for %s hours" % (failureLog[dbAlias]['blackListTime'])) time.sleep(1) self.disconnectDBImportDB() if atlasEnabled == True: log.info("atlasDiscovery stopped")