예제 #1
0
 def single_queue_it(self, count, remaining_count=None):
     """
     This method is used for specifying how to queue your rater plugin based on single process
     :param count: Used to count number of events in a bundle
     :return:
     """
     et = self.sample.earliestTime()
     lt = self.sample.latestTime()
     if count < 1 and count != -1:
         logger.info(
             "There is no data to be generated in worker {0} because the count is {1}."
             .format(self.sample.config.generatorWorkers, count))
     else:
         genPlugin = self.generatorPlugin(sample=self.sample)
         # Adjust queue for threading mode
         genPlugin.updateConfig(config=self.config,
                                outqueue=self.outputQueue)
         genPlugin.updateCounts(count=count, start_time=et, end_time=lt)
         try:
             self.generatorQueue.put(genPlugin)
             logger.info(("Put {0} MB of events in queue for sample '{1}'" +
                          "with et '{2}' and lt '{3}'").format(
                              round((count / 1024.0 / 1024), 4),
                              self.sample.name, et, lt))
         except Full:
             logger.warning(
                 "Generator Queue Full. Skipping current generation.")
예제 #2
0
    def flush(self, q):
        if len(q) > 0:
            logger.debug(
                "Flushing output for sample '%s' in app '%s' for queue '%s'" %
                (self._sample.name, self._app, self._sample.source))

            # Loop through all the messages and build the long string, write once for each flush
            # This may cause the file exceed the maxFileBytes a little bit but will greatly improve the performance
            try:
                for metamsg in q:
                    msg = metamsg.get("_raw")
                    if not msg:
                        continue
                    if msg[-1] != "\n":
                        msg += "\n"

                    if self._fileLength + len(msg) <= self._fileMaxBytes:
                        self._fileHandle.write(msg)
                        self._fileLength += len(msg)
                    else:
                        self._fileHandle.flush()
                        self._fileHandle.close()

                        if os.path.exists(self._file + "." +
                                          str(self._fileBackupFiles)):
                            logger.debug("File Output: Removing file: %s" %
                                         self._file + "." +
                                         str(self._fileBackupFiles))
                            os.unlink(self._file + "." +
                                      str(self._fileBackupFiles))

                        for x in range(1, int(self._fileBackupFiles))[::-1]:
                            logger.debug("File Output: Checking for file: %s" %
                                         self._file + "." + str(x))
                            if os.path.exists(self._file + "." + str(x)):
                                logger.debug(
                                    "File Output: Renaming file %s to %s" % (
                                        self._file + "." + str(x),
                                        self._file + "." + str(x + 1),
                                    ))
                                os.rename(
                                    self._file + "." + str(x),
                                    self._file + "." + str(x + 1),
                                )

                        os.rename(self._file, self._file + ".1")
                        self._fileHandle = open(self._file, "w")
                        self._fileHandle.write(msg)
                        self._fileLength = len(msg)
            except IndexError:
                logger.warning(
                    "IndexError when writting for app '%s' sample '%s'" %
                    (self._app, self._sample.name))

            if not self._fileHandle.closed:
                self._fileHandle.flush()
            logger.debug("Queue for app '%s' sample '%s' written" %
                         (self._app, self._sample.name))

            self._fileHandle.close()
예제 #3
0
 def getTSFromEvent(self, event, passed_token=None):
     currentTime = None
     formats = []
     # JB: 2012/11/20 - Can we optimize this by only testing tokens of type = *timestamp?
     # JB: 2012/11/20 - Alternatively, documentation should suggest putting timestamp as token.0.
     if passed_token is not None:
         tokens = [passed_token]
     else:
         tokens = self.tokens
     for token in tokens:
         try:
             formats.append(token.token)
             # logger.debug("Searching for token '%s' in event '%s'" % (token.token, event))
             results = token._search(event)
             if results:
                 timeFormat = token.replacement
                 group = 0 if len(results.groups()) == 0 else 1
                 timeString = results.group(group)
                 # logger.debug("Testing '%s' as a time string against '%s'" % (timeString, timeFormat))
                 if timeFormat == "%s":
                     ts = float(timeString) if len(timeString) < 10 else float(timeString) \
                          / (10**(len(timeString) - 10))
                     # logger.debug("Getting time for timestamp '%s'" % ts)
                     currentTime = datetime.datetime.fromtimestamp(ts)
                 else:
                     # logger.debug("Getting time for timeFormat '%s' and timeString '%s'" %
                     #                   (timeFormat, timeString))
                     # Working around Python bug with a non thread-safe strptime. Randomly get AttributeError
                     # when calling strptime, so if we get that, try again
                     while currentTime is None:
                         try:
                             # Checking for timezone adjustment
                             if timeString[-5] == "+":
                                 timeString = timeString[:-5]
                             currentTime = datetime.datetime.strptime(timeString, timeFormat)
                         except AttributeError:
                             pass
                 logger.debug("Match '%s' Format '%s' result: '%s'" % (timeString, timeFormat, currentTime))
                 if type(currentTime) == datetime.datetime:
                     break
         except ValueError:
             logger.warning("Match found ('%s') but time parse failed. Timeformat '%s' Event '%s'" %
                                 (timeString, timeFormat, event))
     if type(currentTime) != datetime.datetime:
         # Total fail
         if passed_token is None:  # If we're running for autotimestamp don't log error
             logger.warning(
                 "Can't find a timestamp (using patterns '%s') in this event: '%s'." % (formats, event))
         raise ValueError("Can't find a timestamp (using patterns '%s') in this event: '%s'." % (formats, event))
     # Check to make sure we parsed a year
     if currentTime.year == 1900:
         currentTime = currentTime.replace(year=self.now().year)
     # 11/3/14 CS So, this is breaking replay mode, and getTSFromEvent is only used by replay mode
     #            but I don't remember why I added these two lines of code so it might create a regression.
     #            Found the change on 6/14/14 but no comments as to why I added these two lines.
     # if self.timestamp == None:
     #     self.timestamp = currentTime
     return currentTime
예제 #4
0
    def flush(self, endOfInterval=False):
        """
        Flushes output buffer, unless endOfInterval called, and then only flush if we've been called
        more than maxIntervalsBeforeFlush tunable.
        """
        # TODO: Fix interval flushing somehow with a queue, not sure I even want to support this feature anymore.
        '''if endOfInterval:
            logger.debugv("Sample calling flush, checking increment against maxIntervalsBeforeFlush")
            c.intervalsSinceFlush[self._sample.name].increment()
            if c.intervalsSinceFlush[self._sample.name].value() >= self._sample.maxIntervalsBeforeFlush:
                logger.debugv("Exceeded maxIntervalsBeforeFlush, flushing")
                flushing = True
                c.intervalsSinceFlush[self._sample.name].clear()
            else:
                logger.debugv("Not enough events to flush, passing flush routine.")
        else:
            logger.debugv("maxQueueLength exceeded, flushing")
            flushing = True'''

        # TODO: This is set this way just for the time being while I decide if we want this feature.
        flushing = True
        if flushing:
            q = self._queue
            logger.debug("Flushing queue for sample '%s' with size %d" %
                         (self._sample.name, len(q)))
            self._queue = []
            outputer = self.outputPlugin(self._sample, self.output_counter)
            outputer.updateConfig(self.config)
            outputer.set_events(q)
            # When an outputQueue is used, it needs to run in a single threaded nature which requires to be put back
            # into the outputqueue so a single thread worker can execute it. When an outputQueue is not used, it can be
            # ran by multiple processes or threads. Therefore, no need to put the outputer back into the Queue. Just
            # execute it.
            # if outputPlugin must be used for useOutputQueue, use outputQueue regardless of user config useOutputQueue:
            if self.outputPlugin.useOutputQueue or self.config.useOutputQueue:
                try:
                    self.outputQueue.put(outputer)
                except Full:
                    logger.warning("Output Queue full, looping again")
            else:
                if self.config.splunkEmbedded:
                    tmp = [len(s['_raw']) for s in q]
                    if len(tmp) > 0:
                        metrics_logger.info({
                            'timestamp':
                            datetime.datetime.strftime(datetime.datetime.now(),
                                                       '%Y-%m-%d %H:%M:%S'),
                            'sample':
                            self._sample.name,
                            'events':
                            len(tmp),
                            'bytes':
                            sum(tmp)
                        })
                    tmp = None
                outputer.run()
예제 #5
0
    def queue_it(self, count):
        try:
            realtime = self.sample.now(realnow=True)
            if "-" in self.sample.backfill[0]:
                mathsymbol = "-"
            else:
                mathsymbol = "+"
            backfillnumber = ""
            backfillletter = ""
            for char in self.sample.backfill:
                if char.isdigit():
                    backfillnumber += char
                elif char != "-":
                    backfillletter += char
            backfillearliest = timeParserTimeMath(
                plusminus=mathsymbol,
                num=backfillnumber,
                unit=backfillletter,
                ret=realtime,
            )
            while backfillearliest < realtime:
                et = backfillearliest
                lt = timeParserTimeMath(plusminus="+",
                                        num=self.sample.interval,
                                        unit="s",
                                        ret=et)
                genPlugin = self.generatorPlugin(sample=self.sample)
                genPlugin.updateCounts(count=count, start_time=et, end_time=lt)
                genPlugin.updateConfig(config=self.config,
                                       outqueue=self.outputQueue)
                try:
                    # Need to lock on replay mode since event duration is dynamic.  Interval starts counting
                    # after the replay has finished.
                    if self.sample.generator == "replay":
                        genPlugin.run()
                    else:
                        self.generatorQueue.put(genPlugin)
                except Full:
                    logger.warning(
                        "Generator Queue Full. Skipping current generation.")
                # due to replays needing to iterate in reverse, it's more efficent to process backfill
                # after the file has been parsed.  This section is to allow replay mode to take
                # care of all replays on it's first run. and sets backfilldone
                if self.sample.generator == "replay":
                    backfillearliest = realtime
                else:
                    backfillearliest = lt
            if self.sample.generator != "replay":
                self.sample.backfilldone = True

        except Exception as e:
            logger.error("Failed queuing backfill, exception: {0}".format(e))
예제 #6
0
 def gen(self, count, earliest, latest, samplename=None):
     if count < 0:
         logger.warning(
             'Sample size not found for count=-1 and generator=windbag, defaulting to count=60'
         )
         count = 60
     time_interval = timedelta.total_seconds((latest - earliest)) / count
     for i in range(count):
         current_time_object = earliest + datetime.timedelta(
             0, time_interval * (i + 1))
         msg = '{0} -0700 WINDBAG Event {1} of {2}'.format(
             current_time_object, (i + 1), count)
         self._out.send(msg)
     return 0
예제 #7
0
    def single_queue_it(self, count):
        """
        This method is used for specifying how to queue your rater plugin based on single process
        :param count:
        :return:
        """
        et = self.sample.earliestTime()
        lt = self.sample.latestTime()
        if count < 1 and count != -1:
            logger.info(
                "There is no data to be generated in worker {0} because the count is {1}.".format(
                    self.sample.config.generatorWorkers, count
                )
            )
        else:
            genPlugin = self.generatorPlugin(sample=self.sample)
            # Adjust queue for threading mode
            genPlugin.updateCounts(count=count, start_time=et, end_time=lt)
            genPlugin.updateConfig(config=self.config, outqueue=self.outputQueue)
            try:
                logger.info(
                    (
                        "Put {0} MB of events in queue for sample '{1}'"
                        + "with et '{2}' and lt '{3}'"
                    ).format(
                        round((count / 1024.0 / 1024), 4), self.sample.name, et, lt
                    )
                )
                if self.sample.generator == "replay":
                    # lock on to replay mode, this will keep the timer knowing when to continue cycles since
                    # replay mode has a dynamic replay time and interval doesn't mean the same thing.
                    if (
                        hasattr(self.config, "outputCounter")
                        and self.config.outputCounter
                    ):
                        from splunk_eventgen.lib.outputcounter import OutputCounter

                        output_counter = OutputCounter()
                    elif hasattr(self.config, "outputCounter"):
                        output_counter = self.config.outputCounter
                    genPlugin.run(output_counter=output_counter)
                else:
                    self.generatorQueue.put(genPlugin)
            except Full:
                logger.warning("Generator Queue Full. Skipping current generation.")
예제 #8
0
 def flush(self, endOfInterval=False):
     """
     Flushes output buffer, unless endOfInterval called, and then only flush if we've been called
     more than maxIntervalsBeforeFlush tunable.
     """
     flushing = True
     if flushing:
         q = self._queue
         logger.debug("Flushing queue for sample '%s' with size %d" %
                      (self._sample.name, len(q)))
         self._queue = []
         outputer = self.outputPlugin(self._sample, self.output_counter)
         outputer.updateConfig(self.config)
         outputer.set_events(q)
         # When an outputQueue is used, it needs to run in a single threaded nature which requires to be put back
         # into the outputqueue so a single thread worker can execute it. When an outputQueue is not used, it can be
         # ran by multiple processes or threads. Therefore, no need to put the outputer back into the Queue. Just
         # execute it.
         # if outputPlugin must be used for useOutputQueue, use outputQueue regardless of user config useOutputQueue:
         if self.outputPlugin.useOutputQueue or self.config.useOutputQueue:
             try:
                 self.outputQueue.put(outputer)
             except Full:
                 logger.warning("Output Queue full, looping again")
         else:
             if self.config.splunkEmbedded:
                 tmp = [len(s['_raw']) for s in q]
                 if len(tmp) > 0:
                     metrics_logger.info({
                         'timestamp':
                         datetime.datetime.strftime(datetime.datetime.now(),
                                                    '%Y-%m-%d %H:%M:%S'),
                         'sample':
                         self._sample.name,
                         'events':
                         len(tmp),
                         'bytes':
                         sum(tmp)
                     })
                 tmp = None
             outputer.run()
         q = None
예제 #9
0
 def queue_it(self, count):
     count = count + self.previous_count_left
     if 0 < count < self.raweventsize:
         logger.info(
             "current interval size is {}, which is smaller than a raw event size {}."
             .format(count, self.raweventsize) + "Wait for the next turn.")
         self.update_options(previous_count_left=count)
     else:
         self.update_options(previous_count_left=0)
     et = self.sample.earliestTime()
     lt = self.sample.latestTime()
     # self.generatorPlugin is only an instance, now we need a real plugin. Make a copy of
     # of the sample in case another generator corrupts it.
     genPlugin = self.generatorPlugin(sample=self.sample)
     # Adjust queue for threading mode
     genPlugin.updateConfig(config=self.config, outqueue=self.outputQueue)
     genPlugin.updateCounts(count=count, start_time=et, end_time=lt)
     try:
         self.generatorQueue.put(genPlugin)
     except Full:
         logger.warning(
             "Generator Queue Full. Skipping current generation.")
예제 #10
0
    def real_run(self):
        """
        Worker function of the Timer class.  Determine whether a plugin is queueable, and either
        place an item in the generator queue for that plugin or call the plugin's gen method directly.
        """
        if self.sample.delay > 0:
            logger.info("Sample set to delay %s, sleeping." %
                        self.sample.delay)
            time.sleep(self.sample.delay)

        logger.debug("Timer creating plugin for '%s'" % self.sample.name)

        end = False
        previous_count_left = 0
        raw_event_size = self.predict_event_size()
        if self.end:
            if int(self.end) == 0:
                logger.info(
                    "End = 0, no events will be generated for sample '%s'" %
                    self.sample.name)
                end = True
            elif int(self.end) == -1:
                logger.info(
                    "End is set to -1. Will be running without stopping for sample %s"
                    % self.sample.name)
        while not end:
            # Need to be able to stop threads by the main thread or this thread. self.config will stop all threads
            # referenced in the config object, while, self.stopping will only stop this one.
            if self.config.stopping or self.stopping:
                end = True
                continue
            count = self.rater.rate()
            # First run of the generator, see if we have any backfill work to do.
            if self.countdown <= 0:
                if self.sample.backfill and not self.sample.backfilldone:
                    realtime = self.sample.now(realnow=True)
                    if "-" in self.sample.backfill[0]:
                        mathsymbol = "-"
                    else:
                        mathsymbol = "+"
                    backfillnumber = ""
                    backfillletter = ""
                    for char in self.sample.backfill:
                        if char.isdigit():
                            backfillnumber += char
                        elif char != "-":
                            backfillletter += char
                    backfillearliest = timeParserTimeMath(plusminus=mathsymbol,
                                                          num=backfillnumber,
                                                          unit=backfillletter,
                                                          ret=realtime)
                    while backfillearliest < realtime:
                        if self.end and self.executions == int(self.end):
                            logger.info(
                                "End executions %d reached, ending generation of sample '%s'"
                                % (int(self.end), self.sample.name))
                            break
                        et = backfillearliest
                        lt = timeParserTimeMath(plusminus="+",
                                                num=self.interval,
                                                unit="s",
                                                ret=et)
                        copy_sample = copy.copy(self.sample)
                        tokens = copy.deepcopy(self.sample.tokens)
                        copy_sample.tokens = tokens
                        genPlugin = self.generatorPlugin(sample=copy_sample)
                        # need to make sure we set the queue right if we're using multiprocessing or thread modes
                        genPlugin.updateConfig(config=self.config,
                                               outqueue=self.outputQueue)
                        genPlugin.updateCounts(count=count,
                                               start_time=et,
                                               end_time=lt)
                        try:
                            self.generatorQueue.put(genPlugin, True, 3)
                            self.executions += 1
                            backfillearliest = lt
                        except Full:
                            logger.warning(
                                "Generator Queue Full. Reput the backfill generator task later. %d backfill generators are dispatched.",
                                self.executions)
                            backfillearliest = et
                        realtime = self.sample.now(realnow=True)

                    self.sample.backfilldone = True
                else:
                    # 12/15/13 CS Moving the rating to a separate plugin architecture
                    # Save previous interval count left to avoid perdayvolumegenerator drop small tasks
                    if self.sample.generator == 'perdayvolumegenerator':
                        count = self.rater.rate() + previous_count_left
                        if 0 < count < raw_event_size:
                            logger.info(
                                "current interval size is {}, which is smaller than a raw event size {}."
                                .format(count, raw_event_size) +
                                "Wait for the next turn.")
                            previous_count_left = count
                            self.countdown = self.interval
                            self.executions += 1
                            continue
                        else:
                            previous_count_left = 0
                    else:
                        count = self.rater.rate()

                    et = self.sample.earliestTime()
                    lt = self.sample.latestTime()

                    try:
                        if count < 1 and count != -1:
                            logger.info(
                                "There is no data to be generated in worker {0} because the count is {1}."
                                .format(self.sample.config.generatorWorkers,
                                        count))
                        else:
                            # Spawn workers at the beginning of job rather than wait for next interval
                            logger.info(
                                "Starting '%d' generatorWorkers for sample '%s'"
                                % (self.sample.config.generatorWorkers,
                                   self.sample.name))
                            for worker_id in range(
                                    self.config.generatorWorkers):
                                copy_sample = copy.copy(self.sample)
                                tokens = copy.deepcopy(self.sample.tokens)
                                copy_sample.tokens = tokens
                                genPlugin = self.generatorPlugin(
                                    sample=copy_sample)
                                # Adjust queue for threading mode
                                genPlugin.updateConfig(
                                    config=self.config,
                                    outqueue=self.outputQueue)
                                genPlugin.updateCounts(count=count,
                                                       start_time=et,
                                                       end_time=lt)

                                try:
                                    self.generatorQueue.put(genPlugin)
                                    logger.debug((
                                        "Worker# {0}: Put {1} MB of events in queue for sample '{2}'"
                                        + "with et '{3}' and lt '{4}'").format(
                                            worker_id,
                                            round((count / 1024.0 / 1024), 4),
                                            self.sample.name, et, lt))
                                except Full:
                                    logger.warning(
                                        "Generator Queue Full. Skipping current generation."
                                    )
                            self.executions += 1
                    except Exception as e:
                        logger.exception(str(e))
                        if self.stopping:
                            end = True
                        pass

                # Sleep until we're supposed to wake up and generate more events
                self.countdown = self.interval

                # 8/20/15 CS Adding support for ending generation at a certain time

                if self.end:
                    if int(self.end) == -1:
                        time.sleep(self.time)
                        self.countdown -= self.time
                        continue
                    # 3/16/16 CS Adding support for ending on a number of executions instead of time
                    # Should be fine with storing state in this sample object since each sample has it's own unique
                    # timer thread
                    if not self.endts:
                        if self.executions >= int(self.end):
                            logger.info(
                                "End executions %d reached, ending generation of sample '%s'"
                                % (int(self.end), self.sample.name))
                            self.stopping = True
                            end = True
                    elif lt >= self.endts:
                        logger.info(
                            "End Time '%s' reached, ending generation of sample '%s'"
                            % (self.sample.endts, self.sample.name))
                        self.stopping = True
                        end = True

            else:
                time.sleep(self.time)
                self.countdown -= self.time