def __init__(self, threadName, workerConfig): WorkerThread.__init__(self, threadName, workerConfig) self.logger = Logger.getLogger("ElasticSearch.EsLogger") # self.logger.setLevel(Level.DEBUG) self.workerConfig = workerConfig # Default Bulk Request Settings for ES Logging. # - Set to True to use bulk requests for logs. self.useBulkReq = False self.bulkReqCounter = 0 self.bulkReqExecCountTrigger = 1000 self.lastBulkReqFlush = datetime.now() if (threadName is None): # ==== 1st Instance (threadName is None) ==== # Get the EsLogger queue. # This object will feed the queue through this reference. self.wq = workerConfig.wq self.esNode = self.workerConfig.esNode else: # ==== 2nd Instance (threadName is not None) ==== self.esNode = self.workerConfig.esNode self.esClient = self.esNode.getClient() self.esBulkReq = EsBulkReq(self.esClient, None) self.indexName = workerConfig.indexName # If bulkReq config are set in the workerConfig object, use them. if workerConfig.useBulkReq is not None: self.useBulkReq = workerConfig.useBulkReq if workerConfig.bulkReqExecCountTrigger is not None: self.bulkReqExecCountTrigger = workerConfig.bulkReqExecCountTrigger # Json SerDe objects self.boon = BoonJson() self.esLoggerWorker = None self.esLoggerThread = None self.stopThread = False self.threaded = False self.dtfmt = SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ") if threadName is None: self.threadName = "EsLoggerController" # Startup the Background thread. self.startEsLoggerThread() else: self.threadName = threadName
class EsLogger(WorkerThread): def __init__(self, threadName, workerConfig): WorkerThread.__init__(self, threadName, workerConfig) self.logger = Logger.getLogger("ElasticSearch.EsLogger") # self.logger.setLevel(Level.DEBUG) self.workerConfig = workerConfig # Default Bulk Request Settings for ES Logging. # - Set to True to use bulk requests for logs. self.useBulkReq = False self.bulkReqCounter = 0 self.bulkReqExecCountTrigger = 1000 self.lastBulkReqFlush = datetime.now() if (threadName is None): # ==== 1st Instance (threadName is None) ==== # Get the EsLogger queue. # This object will feed the queue through this reference. self.wq = workerConfig.wq self.esNode = self.workerConfig.esNode else: # ==== 2nd Instance (threadName is not None) ==== self.esNode = self.workerConfig.esNode self.esClient = self.esNode.getClient() self.esBulkReq = EsBulkReq(self.esClient, None) self.indexName = workerConfig.indexName # If bulkReq config are set in the workerConfig object, use them. if workerConfig.useBulkReq is not None: self.useBulkReq = workerConfig.useBulkReq if workerConfig.bulkReqExecCountTrigger is not None: self.bulkReqExecCountTrigger = workerConfig.bulkReqExecCountTrigger # Json SerDe objects self.boon = BoonJson() self.esLoggerWorker = None self.esLoggerThread = None self.stopThread = False self.threaded = False self.dtfmt = SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ") if threadName is None: self.threadName = "EsLoggerController" # Startup the Background thread. self.startEsLoggerThread() else: self.threadName = threadName def startEsLoggerThread(self): if ( (self.esLoggerWorker is None) and (self.esLoggerThread is None) ): # Create the workers config object. # Place the esLogger worker queue into the config object. # Place the index name into the config object. wconfig = EsLoggerConfig(self.esNode, self.workerConfig.esHosts, self.workerConfig.indexName, self.wq) wconfig.useBulkReq = self.workerConfig.useBulkReq wconfig.bulkReqExecCountTrigger = self.workerConfig.bulkReqExecCountTrigger if self.workerConfig.esNode is not None: esNode = self.workerConfig.esNode else: esNode = self.esNode if esNode == 0: self.logger.error("An esNode is required to create the EsLogger WorkerThread.") sys.exit(0) wconfig.esNode = esNode wconfig.esHosts = self.workerConfig.esHosts # Create the esLogger object that will run on a thread. # Give the object a thread name and the workerConfig object. # This esLogger object will run on a thread and feed off of the # worker queue in the worker config object. self.esLoggerWorker = EsLogger("EsLoggerThread", wconfig) # Create the thread. self.esLoggerThread = Thread(name=self.esLoggerWorker.threadName, target=self.esLoggerWorker.start, args=()) # Start the thread. self.esLoggerThread.start() else: self.logger.warn("The EsLoggerThread is already running!") def stopEsLoggerThread(self): if self.esLoggerWorker.useBulkReq: self.logger.debug("Flush Bulk Inserts...") self.esLoggerWorker.flushBulkInserts() # Give 7 seconds to flush any pending inserts. self.logger.debug("-- Sleep 2...") pytime.sleep(7) if self.esLoggerWorker: self.logger.debug("Stop Thread...") self.esLoggerWorker.stopThread = True # Allow 2 seconds for thread to die self.logger.debug("-- Sleep 2...") pytime.sleep(2) # Use interrupts to stop threads. # Interrupt is needed because the thread may be waiting on queue queue. self.logger.debug("Interrupt Thread...") tries = 5 while self.esLoggerWorker.isAlive() and tries > 0: self.esLoggerWorker.interrupt() pytime.sleep(.002) tries = tries - 1 def stop(self): self.stopEsLoggerThread() def run(self): self.threaded = True self.wq = self.workerConfig.wq while not self.stopThread: # Read from workerQueue. wrk = self.dequeueWork() if wrk is not None: # Log to ES logLevel = wrk.get('logLevel') self._logToEs(logLevel, wrk) else: # Flush bulk inserts if there are any in the que, # Every 5 seconds. unFlushedSecs = (datetime.now() - self.lastBulkReqFlush).seconds if ( (unFlushedSecs >= 5) and (self.bulkReqCounter > 0)): self.logger.debug("...EsLogs Bulk Inserts... Flushing after 5 seconds...") self.flushBulkInserts() # Place work into the work queue. def enqueueWork(self, workObj): method = "enqueueWork" self.logger.debug("[{}.{}] Place workObj into workerQueue".format(self.threadName, method) ) self.wq.put(workObj) # Pulls a work item from the queue. def dequeueWork(self): method = "dequeueWork" # True: blocks if there are no items in the queue. # 3: blocking on the queue releases after 1 seconds. try: wrk = None wrk = self.wq.get(True,1) except BaseException as ex: # When we hit a BaseException while waiting on workQue, it is prob a timeout error on and empty queue. if ('Empty' not in type(ex).__name__): self.logger.error("BaseException in [{}.{}] - [{}: {}]".format(self.threadName, method, type(ex), ex)) if wrk is None: self.logger.debug("--> Getting wrk, Queue.get() - timed out.") except JException as ex: self.logger.info("JException in [{}.{}] - [{}]".format(self.threadName, method, ex)) # If we get an interrupt which waiting for work, we need to stop the thread. self.stopThread = True return wrk def logToEs(self, logLevel, jMap): jMap.put('logDateTime', self.dtfmt.format(datetime.now())) jMap.put('logLevel', logLevel) # Push data into queue and return self.enqueueWork(jMap) # Accepts a Java Map object which we can serialize a json string via Boon. # Then use the json string to store to ES. def _logToEs(self, logLevel, jMap): # *** Interactive Debug seems to mess with multi-threading # pdb.set_trace() # Convert the Java Map to a JSON string jsonEvent = self.boon.serToJson(jMap) try: # Get the date from the logDateTime edt = jMap.get('logDateTime') # Pull the data from the TimeStamp pat = r'^(\d+)\-(\d+)\-(\d+)T.*' match = re.search(pat, edt) if match: edt = match.group(1) + match.group(2) + match.group(3) else: edt = 'unknown' # Add the date to the index name. indexName = self.indexName + '-' + edt # Prep to Index document. self.logger.debug("\n==index==> index[{}] logLevel[{}]\n".format(indexName,logLevel)) i1 = self.esClient.prepareIndex(indexName, logLevel, None) i2 = i1.setSource(jsonEvent) self.doEsInsert(i2, indexName, '', jsonEvent) except JException as ex: # ex: g.elasticsearch.index.mapper.MapperParsingException:... # failed to parse [Data.Return.Value.Death.DateNormalized] # Change the name of the bad field to s_<fieldName> in the jsonStr and reinsert the object. # - Call on self.indexEvent(newJson) recursively as there may be multiple failures. self.logger.error("EsLogger Exception Caught [{}] [{}]".format(ex, jsonEvent)) def doEsInsert(self, i2, indexName, typeMap, jsonEvent): #================================================================================= # Single index request operation... # Submits one index operation at a time. #================================================================================= if not self.useBulkReq: self.logger.debug("\n==single==> {}\n".format(jsonEvent)) # Execution a single index request. indxResp = i2.execute().actionGet() #================================================================================= #================================================================================= # Bulk Index Operation... # Keep adding to the bulk index request until we reach N index operations, # then submit the request. #================================================================================= else: self.logger.debug("\n==bulk==> {}\n".format(jsonEvent)) self.esBulkReq.add(i2, indexName, typeMap, jsonEvent) self.bulkReqCounter = self.bulkReqCounter + 1 if self.bulkReqCounter >= self.bulkReqExecCountTrigger: self.esBulkReq.execute() self.bulkReqCounter = 0 self.lastBulkReqFlush = datetime.now() #================================================================================= # If Bulk Request mode is in use, one should call on this function to flush # any pending bulk requests at the end of bulk operation cycles. def flushBulkInserts(self): if self.useBulkReq and (self.bulkReqCounter > 0): self.logger.debug("\n==> BulkReq Flush!\n") self.esBulkReq.execute() self.lastBulkReqFlush = datetime.now() def close(self): self.stopEsLoggerThread()