def __init__(self, aminerConfig, propertyPath, binDefinition, reportInterval, reportEventHandlers, resetAfterReportFlag=True, persistenceId='Default'): """Initialize the analysis component. @param reportInterval delay in seconds between creation of two reports. The parameter is applied to the parsed record data time, not the system time. Hence reports can be delayed when no data is received.""" self.lastReportTime = None self.nextReportTime = 0.0 self.propertyPath = propertyPath self.binDefinition = binDefinition self.histogramData = {} self.reportInterval = reportInterval self.reportEventHandlers = reportEventHandlers self.resetAfterReportFlag = resetAfterReportFlag self.persistenceId = persistenceId self.nextPersistTime = None PersistencyUtil.addPersistableComponent(self) self.persistenceFileName = AMinerConfig.buildPersistenceFileName( aminerConfig, 'PathDependentHistogramAnalysis', persistenceId) persistenceData = PersistencyUtil.loadJson(self.persistenceFileName) if persistenceData is not None: raise Exception('No data reading, def merge yet')
def __init__(self, aminerConfig, anomalyEventHandlers, timestampPath, analyzePathList, minBinElements, minBinTime, syncBinsFlag=True, debugMode=False, persistenceId='Default'): """Initialize the detector. This will also trigger reading or creation of persistence storage location. @param timestampPath if not None, use this path value for timestamp based bins. @param analyzePathList list of match pathes to analyze in this detector. @param minBinElements evaluate the latest bin only after at least that number of elements was added to it. @param minBinTime evaluate the latest bin only when the first element is received after minBinTime has elapsed. @param syncBinsFlag if true the bins of all analyzed path values have to be filled enough to trigger analysis. @param debugMode if true, generate an analysis report even when average of last bin was within expected range.""" self.anomalyEventHandlers = anomalyEventHandlers self.timestampPath = timestampPath self.minBinElements = minBinElements self.minBinTime = minBinTime self.syncBinsFlag = syncBinsFlag self.debugMode = debugMode self.nextPersistTime = None PersistencyUtil.addPersistableComponent(self) self.persistenceFileName = AMinerConfig.buildPersistenceFileName(aminerConfig, \ 'MatchValueAverageChangeDetector', persistenceId) persistenceData = PersistencyUtil.loadJson(self.persistenceFileName) if persistenceData is None: self.statData = [] for path in analyzePathList: self.statData.append((path, [],))
def __init__(self, aminerConfig, histogramDefs, reportInterval, reportEventHandlers, resetAfterReportFlag=True, persistenceId='Default'): """Initialize the analysis component. @param histogramDefs is a list of tuples containing the target property path to analyze and the BinDefinition to apply for binning. @param reportInterval delay in seconds between creation of two reports. The parameter is applied to the parsed record data time, not the system time. Hence reports can be delayed when no data is received.""" self.lastReportTime = None self.nextReportTime = 0.0 self.histogramData = [] for (path, binDefinition) in histogramDefs: self.histogramData.append(HistogramData(path, binDefinition)) self.reportInterval = reportInterval self.reportEventHandlers = reportEventHandlers self.resetAfterReportFlag = resetAfterReportFlag self.persistenceId = persistenceId self.nextPersistTime = None PersistencyUtil.addPersistableComponent(self) self.persistenceFileName = AMinerConfig.buildPersistenceFileName( aminerConfig, 'HistogramAnalysis', persistenceId) persistenceData = PersistencyUtil.loadJson(self.persistenceFileName) if persistenceData != None: raise Exception('No data reading, def merge yet')
def __init__(self, aminerConfig, targetPath, anomalyEventHandlers, persistenceId='Default', autoIncludeFlag=False, defaultInterval=3600, realertInterval=86400, outputLogLine=True): """Initialize the detector. This will also trigger reading or creation of persistence storage location. @param targetPath to extract a source identification value from each logatom.""" self.targetPath = targetPath self.anomalyEventHandlers = anomalyEventHandlers self.autoIncludeFlag = autoIncludeFlag self.defaultInterval = defaultInterval self.realertInterval = realertInterval # This timestamps is compared with timestamp values from log atoms # for activation of alerting logic. The first timestamp from logs # above this value will trigger alerting. self.nextCheckTimestamp = 0 self.lastSeenTimestamp = 0 self.nextPersistTime = None self.outputLogLine = outputLogLine self.aminerConfig = aminerConfig PersistencyUtil.addPersistableComponent(self) self.persistenceFileName = AMinerConfig.buildPersistenceFileName( aminerConfig, self.__class__.__name__, persistenceId) persistenceData = PersistencyUtil.loadJson(self.persistenceFileName) if persistenceData is None: self.expectedValuesDict = {} else: self.expectedValuesDict = persistenceData
def loadPersistencyData(self): """Load the persistency data from storage.""" self.knownValuesDict = {} persistenceData = PersistencyUtil.loadJson(self.persistenceFileName) if persistenceData != None: # Dictionary and tuples were stored as list of lists. Transform # the first lists to tuples to allow hash operation needed by set. for valueTuple, extraData in persistenceData: self.knownValuesDict[tuple(valueTuple)] = extraData
def loadPersistencyData(self): """Load the persistency data from storage.""" persistenceData = PersistencyUtil.loadJson(self.persistenceFileName) if persistenceData is None: self.knownValuesSet = set() else: # Set and tuples were stored as list of lists. Transform the inner # lists to tuples to allow hash operation needed by set. self.knownValuesSet = set( [tuple(record) for record in persistenceData])
def __init__(self, aminerConfig, anomalyEventHandlers, \ persistenceId='Default', autoIncludeFlag=False, outputLogLine=True): """Initialize the detector. This will also trigger reading or creation of persistence storage location.""" self.anomalyEventHandlers = anomalyEventHandlers self.autoIncludeFlag = autoIncludeFlag self.nextPersistTime = None self.outputLogLine = outputLogLine self.aminerConfig = aminerConfig PersistencyUtil.addPersistableComponent(self) self.persistenceFileName = AMinerConfig.buildPersistenceFileName( aminerConfig, self.__class__.__name__, persistenceId) persistenceData = PersistencyUtil.loadJson(self.persistenceFileName) if persistenceData is None: self.knownPathSet = set() else: self.knownPathSet = set(persistenceData)
def runAnalysis(self, masterFd): """This method runs the analysis thread. @param masterFd the main communication socket to the parent to receive logfile updates from the parent. @return 0 on success, e.g. normal termination via signal or 1 on error.""" # The masterControlSocket is the socket to communicate with the # master process to receive commands or logstream data. Expect # the parent/child communication socket on fd 3. This also duplicates # the fd, so close the old one. self.masterControlSocket = socket.fromfd(masterFd, socket.AF_UNIX, socket.SOCK_DGRAM, 0) os.close(masterFd) self.trackedFdsDict[self.masterControlSocket.fileno()] = \ self.masterControlSocket # Locate the real analysis configuration. self.analysisContext.buildAnalysisPipeline() if self.analysisContext.atomizerFactory is None: print('FATAL: buildAnalysisPipeline() did ' \ 'not initialize atomizerFactory, terminating', file=sys.stderr) return 1 realTimeTriggeredComponents = self.analysisContext.realTimeTriggeredComponents analysisTimeTriggeredComponents = self.analysisContext.analysisTimeTriggeredComponents # Load continuation data for last known log streams. The loaded # data has to be a dictionary with repositioning information for # each stream. The data is used only when creating the first stream # with that name. self.repositioningDataDict = PersistencyUtil.loadJson( self.persistenceFileName) if self.repositioningDataDict is None: self.repositioningDataDict = {} # A list of LogStreams where handleStream() blocked due to downstream # not being able to consume the data yet. blockedLogStreams = [] # Always start when number is None. nextRealTimeTriggerTime = None nextAnalysisTimeTriggerTime = None delayedReturnStatus = 0 while self.runAnalysisLoopFlag: # Build the list of inputs to select for anew each time: the LogStream # file descriptors may change due to rollover. inputSelectFdList = [] outputSelectFdList = [] for fdHandlerObject in self.trackedFdsDict.values(): if isinstance(fdHandlerObject, LogStream): streamFd = fdHandlerObject.getCurrentFd() if streamFd < 0: continue inputSelectFdList.append(streamFd) elif isinstance(fdHandlerObject, AnalysisChildRemoteControlHandler): fdHandlerObject.addSelectFds(inputSelectFdList, outputSelectFdList) else: # This has to be a socket, just add the file descriptor. inputSelectFdList.append(fdHandlerObject.fileno()) # Loop over the list in reverse order to avoid skipping elements # in remove. for logStream in reversed(blockedLogStreams): currentStreamFd = logStream.handleStream() if currentStreamFd >= 0: self.trackedFdsDict[currentStreamFd] = logStream inputSelectFdList.append(currentStreamFd) blockedLogStreams.remove(logStream) readList = None writeList = None exceptList = None try: (readList, writeList, exceptList) = select.select(inputSelectFdList, outputSelectFdList, [], 1) except select.error as selectError: # Interrupting signals, e.g. for shutdown are OK. if selectError[0] == errno.EINTR: continue print('Unexpected select result %s' % str(selectError), file=sys.stderr) delayedReturnStatus = 1 break for readFd in readList: fdHandlerObject = self.trackedFdsDict[readFd] if isinstance(fdHandlerObject, LogStream): # Handle this LogStream. Only when downstream processing blocks, # add the stream to the blocked stream list. handleResult = fdHandlerObject.handleStream() if handleResult < 0: # No need to care if current internal file descriptor in LogStream # has changed in handleStream(), this will be handled when unblocking. del self.trackedFdsDict[readFd] blockedLogStreams.append(fdHandlerObject) elif handleResult != readFd: # The current fd has changed, update the tracking list. del self.trackedFdsDict[readFd] self.trackedFdsDict[handleResult] = fdHandlerObject continue if isinstance(fdHandlerObject, AnalysisChildRemoteControlHandler): try: fdHandlerObject.doReceive() except Exception as receiveException: print('Unclean termination of remote ' \ 'control: %s' % str(receiveException), file=sys.stderr) if fdHandlerObject.isDead(): del self.trackedFdsDict[readFd] # Reading is only attempted when output buffer was already flushed. # Try processing the next request to fill the output buffer for # next round. else: fdHandlerObject.doProcess(self.analysisContext) continue if fdHandlerObject == self.masterControlSocket: self.handleMasterControlSocketReceive() continue if fdHandlerObject == self.remoteControlSocket: # We received a remote connection, accept it unconditionally. # Users should make sure, that they do not exhaust resources by # hogging open connections. (controlClientSocket, remoteAddress) = \ self.remoteControlSocket.accept() # Keep track of information received via this remote control socket. remoteControlHandler = AnalysisChildRemoteControlHandler( controlClientSocket) self.trackedFdsDict[ controlClientSocket.fileno()] = remoteControlHandler continue raise Exception('Unhandled object type %s' % type(fdHandlerObject)) for writeFd in writeList: fdHandlerObject = self.trackedFdsDict[writeFd] if isinstance(fdHandlerObject, AnalysisChildRemoteControlHandler): bufferFlushedFlag = False try: bufferFlushedFlag = fdHandlerObject.doSend() except OSError as sendError: print('Error sending data via remote ' \ 'control: %s' % str(sendError), file=sys.stderr) try: fdHandlerObject.terminate() except Exception as terminateException: print('Unclean termination of remote ' \ 'control: %s' % str(terminateException), file=sys.stderr) if bufferFlushedFlag: fdHandlerObject.doProcess(self.analysisContext) if fdHandlerObject.isDead(): del self.trackedFdsDict[writeFd] continue raise Exception('Unhandled object type %s' % type(fdHandlerObject)) # Handle the real time events. realTime = time.time() if nextRealTimeTriggerTime is None or realTime >= nextRealTimeTriggerTime: nextTriggerOffset = 3600 for component in realTimeTriggeredComponents: nextTriggerRequest = component.doTimer(realTime) nextTriggerOffset = min(nextTriggerOffset, nextTriggerRequest) nextRealTimeTriggerTime = realTime + nextTriggerOffset # Handle the analysis time events. The analysis time will be different # when an analysis time component is registered. analysisTime = self.analysisContext.analysisTime if analysisTime is None: analysisTime = realTime if nextAnalysisTimeTriggerTime is None or analysisTime >= nextAnalysisTimeTriggerTime: nextTriggerOffset = 3600 for component in analysisTimeTriggeredComponents: nextTriggerRequest = component.doTimer(realTime) nextTriggerOffset = min(nextTriggerOffset, nextTriggerRequest) nextAnalysisTimeTriggerTime = analysisTime + nextTriggerOffset # Analysis loop is only left on shutdown. Try to persist everything # and leave. PersistencyUtil.persistAll() return delayedReturnStatus