Esempio n. 1
0
    def __init__(self, threadName, workerConfig):
        WorkerThread.__init__(self, threadName, workerConfig)
        
        self.logger = Logger.getLogger("ElasticSearch.EsLogger")
        # self.logger.setLevel(Level.DEBUG)

        self.workerConfig = workerConfig

        # Default Bulk Request Settings for ES Logging.
        # - Set to True to use bulk requests for logs.
        self.useBulkReq = False
        self.bulkReqCounter = 0
        self.bulkReqExecCountTrigger = 1000
        self.lastBulkReqFlush = datetime.now()

        if (threadName is None):
            # ==== 1st Instance (threadName is None) ====
            # Get the EsLogger queue.
            # This object will feed the queue through this reference.
            self.wq = workerConfig.wq
            self.esNode = self.workerConfig.esNode

        else:
            # ==== 2nd Instance (threadName is not None) ====
            self.esNode = self.workerConfig.esNode
            self.esClient = self.esNode.getClient()
            self.esBulkReq = EsBulkReq(self.esClient, None)

            self.indexName = workerConfig.indexName
            
            # If bulkReq config are set in the workerConfig object, use them.
            if workerConfig.useBulkReq is not None:
                self.useBulkReq = workerConfig.useBulkReq
            if workerConfig.bulkReqExecCountTrigger is not None:
                self.bulkReqExecCountTrigger = workerConfig.bulkReqExecCountTrigger

        # Json SerDe objects
        self.boon = BoonJson()

        self.esLoggerWorker = None
        self.esLoggerThread = None
        self.stopThread = False
        self.threaded = False

        self.dtfmt = SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ")

        if threadName is None:
            self.threadName = "EsLoggerController"
            # Startup the Background thread.
            self.startEsLoggerThread()
        else:
            self.threadName = threadName
Esempio n. 2
0
class EsLogger(WorkerThread):
    def __init__(self, threadName, workerConfig):
        WorkerThread.__init__(self, threadName, workerConfig)
        
        self.logger = Logger.getLogger("ElasticSearch.EsLogger")
        # self.logger.setLevel(Level.DEBUG)

        self.workerConfig = workerConfig

        # Default Bulk Request Settings for ES Logging.
        # - Set to True to use bulk requests for logs.
        self.useBulkReq = False
        self.bulkReqCounter = 0
        self.bulkReqExecCountTrigger = 1000
        self.lastBulkReqFlush = datetime.now()

        if (threadName is None):
            # ==== 1st Instance (threadName is None) ====
            # Get the EsLogger queue.
            # This object will feed the queue through this reference.
            self.wq = workerConfig.wq
            self.esNode = self.workerConfig.esNode

        else:
            # ==== 2nd Instance (threadName is not None) ====
            self.esNode = self.workerConfig.esNode
            self.esClient = self.esNode.getClient()
            self.esBulkReq = EsBulkReq(self.esClient, None)

            self.indexName = workerConfig.indexName
            
            # If bulkReq config are set in the workerConfig object, use them.
            if workerConfig.useBulkReq is not None:
                self.useBulkReq = workerConfig.useBulkReq
            if workerConfig.bulkReqExecCountTrigger is not None:
                self.bulkReqExecCountTrigger = workerConfig.bulkReqExecCountTrigger

        # Json SerDe objects
        self.boon = BoonJson()

        self.esLoggerWorker = None
        self.esLoggerThread = None
        self.stopThread = False
        self.threaded = False

        self.dtfmt = SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ")

        if threadName is None:
            self.threadName = "EsLoggerController"
            # Startup the Background thread.
            self.startEsLoggerThread()
        else:
            self.threadName = threadName

    def startEsLoggerThread(self):
 
        if (     (self.esLoggerWorker is None)
             and (self.esLoggerThread is None) ):
            # Create the workers config object.
            # Place the esLogger worker queue into the config object.
            # Place the index name into the config object.
            wconfig = EsLoggerConfig(self.esNode, self.workerConfig.esHosts,
                                     self.workerConfig.indexName, self.wq)
            
            wconfig.useBulkReq = self.workerConfig.useBulkReq
            wconfig.bulkReqExecCountTrigger = self.workerConfig.bulkReqExecCountTrigger

            if self.workerConfig.esNode is not None:
                esNode = self.workerConfig.esNode
            else:
                esNode = self.esNode

            if esNode == 0:
                self.logger.error("An esNode is required to create the EsLogger WorkerThread.")
                sys.exit(0)

            wconfig.esNode = esNode
            wconfig.esHosts = self.workerConfig.esHosts

            # Create the esLogger object that will run on a thread.
            # Give the object a thread name and the workerConfig object.
            # This esLogger object will run on a thread and feed off of the
            # worker queue in the worker config object.
            self.esLoggerWorker = EsLogger("EsLoggerThread", wconfig)
            # Create the thread.
            self.esLoggerThread = Thread(name=self.esLoggerWorker.threadName,
                                         target=self.esLoggerWorker.start, args=())
            # Start the thread.
            self.esLoggerThread.start()
        else:
            self.logger.warn("The EsLoggerThread is already running!")

    
    def stopEsLoggerThread(self):
        if self.esLoggerWorker.useBulkReq:
            self.logger.debug("Flush Bulk Inserts...")
            self.esLoggerWorker.flushBulkInserts()

        # Give 7 seconds to flush any pending inserts.
        self.logger.debug("-- Sleep 2...")
        pytime.sleep(7)

        if self.esLoggerWorker:
            self.logger.debug("Stop Thread...")
            self.esLoggerWorker.stopThread = True

            # Allow 2 seconds for thread to die
            self.logger.debug("-- Sleep 2...")
            pytime.sleep(2)

            # Use interrupts to stop threads.
            # Interrupt is needed because the thread may be waiting on queue queue.
            self.logger.debug("Interrupt Thread...")
            tries = 5
            while self.esLoggerWorker.isAlive() and tries > 0:
                self.esLoggerWorker.interrupt()
                pytime.sleep(.002)
                tries = tries - 1


    def stop(self):
        self.stopEsLoggerThread()

    def run(self):
        self.threaded = True
        self.wq = self.workerConfig.wq

        while not self.stopThread:
            # Read from workerQueue.
            wrk = self.dequeueWork()

            if wrk is not None:
                # Log to ES
                logLevel = wrk.get('logLevel')
                self._logToEs(logLevel, wrk)
            else:
                # Flush bulk inserts if there are any in the que,
                # Every 5 seconds.
                unFlushedSecs = (datetime.now() - self.lastBulkReqFlush).seconds
                if (    (unFlushedSecs >= 5)
                    and (self.bulkReqCounter > 0)):
                    self.logger.debug("...EsLogs Bulk Inserts... Flushing after 5 seconds...")
                    self.flushBulkInserts()

    # Place work into the work queue.
    def enqueueWork(self, workObj):
        method = "enqueueWork"
        self.logger.debug("[{}.{}] Place workObj into workerQueue".format(self.threadName,
                                                                          method) )
        self.wq.put(workObj)


    # Pulls a work item from the queue.
    def dequeueWork(self):
        method = "dequeueWork"
        # True: blocks if there are no items in the queue.
        #    3: blocking on the queue releases after 1 seconds.
        try:
            wrk = None
            wrk = self.wq.get(True,1)
        except BaseException as ex:
            # When we hit a BaseException while waiting on workQue, it is prob a timeout error on and empty queue.
            if ('Empty' not in type(ex).__name__):
                self.logger.error("BaseException in [{}.{}] - [{}: {}]".format(self.threadName, method, type(ex), ex))
            if wrk is None:
                self.logger.debug("--> Getting wrk, Queue.get() - timed out.")
        except JException as ex:
            self.logger.info("JException in [{}.{}] - [{}]".format(self.threadName, method, ex))
            # If we get an interrupt which waiting for work, we need to stop the thread.
            self.stopThread = True
        return wrk


    def logToEs(self, logLevel, jMap):

        jMap.put('logDateTime', self.dtfmt.format(datetime.now()))
        jMap.put('logLevel', logLevel)

        # Push data into queue and return
        self.enqueueWork(jMap)

        
    # Accepts a Java Map object which we can serialize a json string via Boon.
    # Then use the json string to store to ES.
    def _logToEs(self, logLevel, jMap):
        # *** Interactive Debug seems to mess with multi-threading
        # pdb.set_trace()

        # Convert the Java Map to a JSON string
        jsonEvent = self.boon.serToJson(jMap)

        try:
            # Get the date from the logDateTime
            edt = jMap.get('logDateTime')
            # Pull the data from the TimeStamp 
            pat = r'^(\d+)\-(\d+)\-(\d+)T.*' 
            match = re.search(pat, edt) 
            if match: 
                edt = match.group(1) +  match.group(2) +  match.group(3) 
            else:
                edt = 'unknown'

            # Add the date to the index name.
            indexName = self.indexName + '-' + edt

            # Prep to Index document.
            self.logger.debug("\n==index==> index[{}] logLevel[{}]\n".format(indexName,logLevel))
            i1 = self.esClient.prepareIndex(indexName, logLevel, None)
            i2 = i1.setSource(jsonEvent)

            self.doEsInsert(i2, indexName, '', jsonEvent)

        except JException as ex:
            # ex: g.elasticsearch.index.mapper.MapperParsingException:...
            #     failed to parse [Data.Return.Value.Death.DateNormalized]
            # Change the name of the bad field to s_<fieldName> in the jsonStr and reinsert the object.
            # - Call on self.indexEvent(newJson) recursively as there may be multiple failures.
            self.logger.error("EsLogger Exception Caught [{}] [{}]".format(ex, jsonEvent))

    def doEsInsert(self, i2, indexName, typeMap, jsonEvent):
        #=================================================================================
        # Single index request operation...
        # Submits one index operation at a time.
        #=================================================================================
        if not self.useBulkReq:
            self.logger.debug("\n==single==> {}\n".format(jsonEvent))
            # Execution a single index request.
            indxResp = i2.execute().actionGet()
        #=================================================================================

        #=================================================================================
        # Bulk Index Operation...
        # Keep adding to the bulk index request until we reach N index operations,
        # then submit the request.
        #=================================================================================
        else:
            self.logger.debug("\n==bulk==> {}\n".format(jsonEvent))
            self.esBulkReq.add(i2, indexName, typeMap, jsonEvent)
            self.bulkReqCounter = self.bulkReqCounter + 1
            if self.bulkReqCounter >= self.bulkReqExecCountTrigger:
                self.esBulkReq.execute()
                self.bulkReqCounter = 0
                self.lastBulkReqFlush = datetime.now()
        #=================================================================================

    # If Bulk Request mode is in use, one should call on this function to flush
    # any pending bulk requests at the end of bulk operation cycles.
    def flushBulkInserts(self):
        if self.useBulkReq and (self.bulkReqCounter > 0):
            self.logger.debug("\n==> BulkReq Flush!\n")
            self.esBulkReq.execute()
            self.lastBulkReqFlush = datetime.now()


    def close(self):
        self.stopEsLoggerThread()