Example #1
0
    def __init__(self,
                 aminerConfig,
                 propertyPath,
                 binDefinition,
                 reportInterval,
                 reportEventHandlers,
                 resetAfterReportFlag=True,
                 persistenceId='Default'):
        """Initialize the analysis component.
    @param reportInterval delay in seconds between creation of two
    reports. The parameter is applied to the parsed record data
    time, not the system time. Hence reports can be delayed when
    no data is received."""
        self.lastReportTime = None
        self.nextReportTime = 0.0
        self.propertyPath = propertyPath
        self.binDefinition = binDefinition
        self.histogramData = {}
        self.reportInterval = reportInterval
        self.reportEventHandlers = reportEventHandlers
        self.resetAfterReportFlag = resetAfterReportFlag
        self.persistenceId = persistenceId
        self.nextPersistTime = None

        PersistencyUtil.addPersistableComponent(self)
        self.persistenceFileName = AMinerConfig.buildPersistenceFileName(
            aminerConfig, 'PathDependentHistogramAnalysis', persistenceId)
        persistenceData = PersistencyUtil.loadJson(self.persistenceFileName)
        if persistenceData is not None:
            raise Exception('No data reading, def merge yet')
Example #2
0
  def __init__(self, aminerConfig, anomalyEventHandlers, timestampPath,
               analyzePathList, minBinElements, minBinTime, syncBinsFlag=True,
               debugMode=False, persistenceId='Default'):
    """Initialize the detector. This will also trigger reading
    or creation of persistence storage location.
    @param timestampPath if not None, use this path value for
    timestamp based bins.
    @param analyzePathList list of match pathes to analyze in
    this detector.
    @param minBinElements evaluate the latest bin only after at
    least that number of elements was added to it.
    @param minBinTime evaluate the latest bin only when the first
    element is received after minBinTime has elapsed.
    @param syncBinsFlag if true the bins of all analyzed path values
    have to be filled enough to trigger analysis.
    @param debugMode if true, generate an analysis report even
    when average of last bin was within expected range."""
    self.anomalyEventHandlers = anomalyEventHandlers
    self.timestampPath = timestampPath
    self.minBinElements = minBinElements
    self.minBinTime = minBinTime
    self.syncBinsFlag = syncBinsFlag
    self.debugMode = debugMode
    self.nextPersistTime = None

    PersistencyUtil.addPersistableComponent(self)
    self.persistenceFileName = AMinerConfig.buildPersistenceFileName(aminerConfig, \
      'MatchValueAverageChangeDetector', persistenceId)
    persistenceData = PersistencyUtil.loadJson(self.persistenceFileName)
    if persistenceData is None:
      self.statData = []
      for path in analyzePathList:
        self.statData.append((path, [],))
Example #3
0
    def __init__(self,
                 aminerConfig,
                 histogramDefs,
                 reportInterval,
                 reportEventHandlers,
                 resetAfterReportFlag=True,
                 persistenceId='Default'):
        """Initialize the analysis component.
    @param histogramDefs is a list of tuples containing the target
    property path to analyze and the BinDefinition to apply for
    binning.
    @param reportInterval delay in seconds between creation of two
    reports. The parameter is applied to the parsed record data
    time, not the system time. Hence reports can be delayed when
    no data is received."""
        self.lastReportTime = None
        self.nextReportTime = 0.0
        self.histogramData = []
        for (path, binDefinition) in histogramDefs:
            self.histogramData.append(HistogramData(path, binDefinition))
        self.reportInterval = reportInterval
        self.reportEventHandlers = reportEventHandlers
        self.resetAfterReportFlag = resetAfterReportFlag
        self.persistenceId = persistenceId
        self.nextPersistTime = None

        PersistencyUtil.addPersistableComponent(self)
        self.persistenceFileName = AMinerConfig.buildPersistenceFileName(
            aminerConfig, 'HistogramAnalysis', persistenceId)
        persistenceData = PersistencyUtil.loadJson(self.persistenceFileName)
        if persistenceData != None:
            raise Exception('No data reading, def merge yet')
    def __init__(self,
                 aminerConfig,
                 targetPath,
                 anomalyEventHandlers,
                 persistenceId='Default',
                 autoIncludeFlag=False,
                 defaultInterval=3600,
                 realertInterval=86400,
                 outputLogLine=True):
        """Initialize the detector. This will also trigger reading
    or creation of persistence storage location.
    @param targetPath to extract a source identification value
    from each logatom."""
        self.targetPath = targetPath
        self.anomalyEventHandlers = anomalyEventHandlers
        self.autoIncludeFlag = autoIncludeFlag
        self.defaultInterval = defaultInterval
        self.realertInterval = realertInterval
        # This timestamps is compared with timestamp values from log atoms
        # for activation of alerting logic. The first timestamp from logs
        # above this value will trigger alerting.
        self.nextCheckTimestamp = 0
        self.lastSeenTimestamp = 0
        self.nextPersistTime = None
        self.outputLogLine = outputLogLine
        self.aminerConfig = aminerConfig

        PersistencyUtil.addPersistableComponent(self)
        self.persistenceFileName = AMinerConfig.buildPersistenceFileName(
            aminerConfig, self.__class__.__name__, persistenceId)
        persistenceData = PersistencyUtil.loadJson(self.persistenceFileName)
        if persistenceData is None:
            self.expectedValuesDict = {}
        else:
            self.expectedValuesDict = persistenceData
 def loadPersistencyData(self):
     """Load the persistency data from storage."""
     self.knownValuesDict = {}
     persistenceData = PersistencyUtil.loadJson(self.persistenceFileName)
     if persistenceData != None:
         # Dictionary and tuples were stored as list of lists. Transform
         # the first lists to tuples to allow hash operation needed by set.
         for valueTuple, extraData in persistenceData:
             self.knownValuesDict[tuple(valueTuple)] = extraData
 def loadPersistencyData(self):
     """Load the persistency data from storage."""
     persistenceData = PersistencyUtil.loadJson(self.persistenceFileName)
     if persistenceData is None:
         self.knownValuesSet = set()
     else:
         # Set and tuples were stored as list of lists. Transform the inner
         # lists to tuples to allow hash operation needed by set.
         self.knownValuesSet = set(
             [tuple(record) for record in persistenceData])
Example #7
0
  def __init__(self, aminerConfig, anomalyEventHandlers, \
    persistenceId='Default', autoIncludeFlag=False, outputLogLine=True):
    """Initialize the detector. This will also trigger reading
    or creation of persistence storage location."""
    self.anomalyEventHandlers = anomalyEventHandlers
    self.autoIncludeFlag = autoIncludeFlag
    self.nextPersistTime = None
    self.outputLogLine = outputLogLine
    self.aminerConfig = aminerConfig

    PersistencyUtil.addPersistableComponent(self)
    self.persistenceFileName = AMinerConfig.buildPersistenceFileName(
        aminerConfig, self.__class__.__name__, persistenceId)
    persistenceData = PersistencyUtil.loadJson(self.persistenceFileName)
    if persistenceData is None:
      self.knownPathSet = set()
    else:
      self.knownPathSet = set(persistenceData)
Example #8
0
    def runAnalysis(self, masterFd):
        """This method runs the analysis thread.
    @param masterFd the main communication socket to the parent
    to receive logfile updates from the parent.
    @return 0 on success, e.g. normal termination via signal or
    1 on error."""

        # The masterControlSocket is the socket to communicate with the
        # master process to receive commands or logstream data. Expect
        # the parent/child communication socket on fd 3. This also duplicates
        # the fd, so close the old one.
        self.masterControlSocket = socket.fromfd(masterFd, socket.AF_UNIX,
                                                 socket.SOCK_DGRAM, 0)
        os.close(masterFd)
        self.trackedFdsDict[self.masterControlSocket.fileno()] = \
            self.masterControlSocket

        # Locate the real analysis configuration.
        self.analysisContext.buildAnalysisPipeline()
        if self.analysisContext.atomizerFactory is None:
            print('FATAL: buildAnalysisPipeline() did ' \
                'not initialize atomizerFactory, terminating', file=sys.stderr)
            return 1

        realTimeTriggeredComponents = self.analysisContext.realTimeTriggeredComponents
        analysisTimeTriggeredComponents = self.analysisContext.analysisTimeTriggeredComponents

        # Load continuation data for last known log streams. The loaded
        # data has to be a dictionary with repositioning information for
        # each stream. The data is used only when creating the first stream
        # with that name.
        self.repositioningDataDict = PersistencyUtil.loadJson(
            self.persistenceFileName)
        if self.repositioningDataDict is None:
            self.repositioningDataDict = {}

# A list of LogStreams where handleStream() blocked due to downstream
# not being able to consume the data yet.
        blockedLogStreams = []

        # Always start when number is None.
        nextRealTimeTriggerTime = None
        nextAnalysisTimeTriggerTime = None

        delayedReturnStatus = 0
        while self.runAnalysisLoopFlag:
            # Build the list of inputs to select for anew each time: the LogStream
            # file descriptors may change due to rollover.
            inputSelectFdList = []
            outputSelectFdList = []
            for fdHandlerObject in self.trackedFdsDict.values():
                if isinstance(fdHandlerObject, LogStream):
                    streamFd = fdHandlerObject.getCurrentFd()
                    if streamFd < 0:
                        continue
                    inputSelectFdList.append(streamFd)
                elif isinstance(fdHandlerObject,
                                AnalysisChildRemoteControlHandler):
                    fdHandlerObject.addSelectFds(inputSelectFdList,
                                                 outputSelectFdList)
                else:
                    # This has to be a socket, just add the file descriptor.
                    inputSelectFdList.append(fdHandlerObject.fileno())

# Loop over the list in reverse order to avoid skipping elements
# in remove.
            for logStream in reversed(blockedLogStreams):
                currentStreamFd = logStream.handleStream()
                if currentStreamFd >= 0:
                    self.trackedFdsDict[currentStreamFd] = logStream
                    inputSelectFdList.append(currentStreamFd)
                    blockedLogStreams.remove(logStream)

            readList = None
            writeList = None
            exceptList = None
            try:
                (readList, writeList,
                 exceptList) = select.select(inputSelectFdList,
                                             outputSelectFdList, [], 1)
            except select.error as selectError:
                # Interrupting signals, e.g. for shutdown are OK.
                if selectError[0] == errno.EINTR:
                    continue
                print('Unexpected select result %s' % str(selectError),
                      file=sys.stderr)
                delayedReturnStatus = 1
                break
            for readFd in readList:
                fdHandlerObject = self.trackedFdsDict[readFd]
                if isinstance(fdHandlerObject, LogStream):
                    # Handle this LogStream. Only when downstream processing blocks,
                    # add the stream to the blocked stream list.
                    handleResult = fdHandlerObject.handleStream()
                    if handleResult < 0:
                        # No need to care if current internal file descriptor in LogStream
                        # has changed in handleStream(), this will be handled when unblocking.
                        del self.trackedFdsDict[readFd]
                        blockedLogStreams.append(fdHandlerObject)
                    elif handleResult != readFd:
                        # The current fd has changed, update the tracking list.
                        del self.trackedFdsDict[readFd]
                        self.trackedFdsDict[handleResult] = fdHandlerObject
                    continue

                if isinstance(fdHandlerObject,
                              AnalysisChildRemoteControlHandler):
                    try:
                        fdHandlerObject.doReceive()
                    except Exception as receiveException:
                        print('Unclean termination of remote ' \
                            'control: %s' % str(receiveException), file=sys.stderr)
                    if fdHandlerObject.isDead():
                        del self.trackedFdsDict[readFd]
# Reading is only attempted when output buffer was already flushed.
# Try processing the next request to fill the output buffer for
# next round.
                    else:
                        fdHandlerObject.doProcess(self.analysisContext)
                    continue

                if fdHandlerObject == self.masterControlSocket:
                    self.handleMasterControlSocketReceive()
                    continue

                if fdHandlerObject == self.remoteControlSocket:
                    # We received a remote connection, accept it unconditionally.
                    # Users should make sure, that they do not exhaust resources by
                    # hogging open connections.
                    (controlClientSocket, remoteAddress) = \
                        self.remoteControlSocket.accept()
                    # Keep track of information received via this remote control socket.
                    remoteControlHandler = AnalysisChildRemoteControlHandler(
                        controlClientSocket)
                    self.trackedFdsDict[
                        controlClientSocket.fileno()] = remoteControlHandler
                    continue
                raise Exception('Unhandled object type %s' %
                                type(fdHandlerObject))

            for writeFd in writeList:
                fdHandlerObject = self.trackedFdsDict[writeFd]
                if isinstance(fdHandlerObject,
                              AnalysisChildRemoteControlHandler):
                    bufferFlushedFlag = False
                    try:
                        bufferFlushedFlag = fdHandlerObject.doSend()
                    except OSError as sendError:
                        print('Error sending data via remote ' \
                            'control: %s' % str(sendError), file=sys.stderr)
                        try:
                            fdHandlerObject.terminate()
                        except Exception as terminateException:
                            print('Unclean termination of remote ' \
                                'control: %s' % str(terminateException), file=sys.stderr)
                    if bufferFlushedFlag:
                        fdHandlerObject.doProcess(self.analysisContext)
                    if fdHandlerObject.isDead():
                        del self.trackedFdsDict[writeFd]
                    continue
                raise Exception('Unhandled object type %s' %
                                type(fdHandlerObject))

# Handle the real time events.
            realTime = time.time()
            if nextRealTimeTriggerTime is None or realTime >= nextRealTimeTriggerTime:
                nextTriggerOffset = 3600
                for component in realTimeTriggeredComponents:
                    nextTriggerRequest = component.doTimer(realTime)
                    nextTriggerOffset = min(nextTriggerOffset,
                                            nextTriggerRequest)
                nextRealTimeTriggerTime = realTime + nextTriggerOffset

# Handle the analysis time events. The analysis time will be different
# when an analysis time component is registered.
            analysisTime = self.analysisContext.analysisTime
            if analysisTime is None:
                analysisTime = realTime
            if nextAnalysisTimeTriggerTime is None or analysisTime >= nextAnalysisTimeTriggerTime:
                nextTriggerOffset = 3600
                for component in analysisTimeTriggeredComponents:
                    nextTriggerRequest = component.doTimer(realTime)
                    nextTriggerOffset = min(nextTriggerOffset,
                                            nextTriggerRequest)
                nextAnalysisTimeTriggerTime = analysisTime + nextTriggerOffset

# Analysis loop is only left on shutdown. Try to persist everything
# and leave.
        PersistencyUtil.persistAll()
        return delayedReturnStatus