Beispiel #1
0
 def replace_tokens(self,
                    eventsDict,
                    earliest,
                    latest,
                    ignore_tokens=False):
     """Iterate event tokens and replace them. This will help calculations for event size when tokens are used."""
     eventcount = 0
     send_events = []
     total_count = len(eventsDict)
     for targetevent in eventsDict:
         event = targetevent["_raw"]
         # Maintain state for every token in a given event, Hash contains keys for each file name which is
         # assigned a list of values picked from a random line in that file
         mvhash = {}
         host = targetevent['host']
         if hasattr(self._sample, "sequentialTimestamp") and self._sample.sequentialTimestamp and \
                    self._sample.generator != 'perdayvolumegenerator':
             pivot_timestamp = EventgenTimestamp.get_sequential_timestamp(
                 earliest, latest, eventcount, total_count)
         else:
             pivot_timestamp = EventgenTimestamp.get_random_timestamp(
                 earliest, latest)
         # Iterate tokens
         if not ignore_tokens:
             for token in self._sample.tokens:
                 token.mvhash = mvhash
                 event = token.replace(event,
                                       et=earliest,
                                       lt=latest,
                                       s=self._sample,
                                       pivot_timestamp=pivot_timestamp)
                 if token.replacementType == 'timestamp' and self._sample.timeField != '_raw':
                     self._sample.timestamp = None
                     token.replace(targetevent[self._sample.timeField],
                                   et=self._sample.earliestTime(),
                                   lt=self._sample.latestTime(),
                                   s=self._sample,
                                   pivot_timestamp=pivot_timestamp)
             if self._sample.hostToken:
                 # clear the host mvhash every time, because we need to re-randomize it
                 self._sample.hostToken.mvhash = {}
             if self._sample.hostToken:
                 host = self._sample.hostToken.replace(host, s=self._sample)
         try:
             time_val = int(time.mktime(pivot_timestamp.timetuple()))
         except Exception:
             time_val = int(time.mktime(self._sample.now().timetuple()))
         l = {
             '_raw': event,
             'index': targetevent['index'],
             'host': host,
             'hostRegex': self._sample.hostRegex,
             'source': targetevent['source'],
             'sourcetype': targetevent['sourcetype'],
             '_time': time_val
         }
         send_events.append(l)
     return send_events
    def build_events(self, eventsDict, startTime, earliest, latest):
        eventcount = 0
        for targetevent in eventsDict:
            try:
                event = targetevent['_raw']
                if event == "\n":
                    continue
                # Maintain state for every token in a given event, Hash contains keys for each file name which is
                # assigned a list of values picked from a random line in that file
                mvhash = {}
                pivot_timestamp = EventgenTimestamp.get_random_timestamp(earliest, latest, self._sample.earliest,
                                                                         self._sample.latest)
                ## Iterate tokens
                for token in self._sample.tokens:
                    token.mvhash = mvhash
                    event = token.replace(event, et=earliest, lt=latest, s=self._sample,
                                          pivot_timestamp=pivot_timestamp)
                    if token.replacementType == 'timestamp' and self._sample.timeField != '_raw':
                        self._sample.timestamp = None
                        token.replace(targetevent[self._sample.timeField], et=self._sample.earliestTime(),
                                      lt=self._sample.latestTime(), s=self._sample, pivot_timestamp=pivot_timestamp)
                if (self._sample.hostToken):
                    # clear the host mvhash every time, because we need to re-randomize it
                    self._sample.hostToken.mvhash = {}

                host = targetevent['host']
                if (self._sample.hostToken):
                    host = self._sample.hostToken.replace(host, s=self._sample)

                try:
                    time_val = int(time.mktime(pivot_timestamp.timetuple()))
                except Exception:
                    time_val = int(time.mktime(self._sample.now().timetuple()))

                l = [{'_raw': event,
                      'index': targetevent['index'],
                      'host': host,
                      'hostRegex': self._sample.hostRegex,
                      'source': targetevent['source'],
                      'sourcetype': targetevent['sourcetype'],
                      '_time': time_val}]
                eventcount += 1
                self._out.bulksend(l)
                self._sample.timestamp = None
            except Exception as e:
                self.logger.exception("Exception {} happened.".format(type(e)))
                raise e

        try:
            # TODO: Change this logic so that we don't lose all events if an exception is hit (try/except/break?)
            endTime = datetime.datetime.now()
            timeDiff = endTime - startTime
            timeDiffFrac = "%d.%06d" % (timeDiff.seconds, timeDiff.microseconds)
            self.logger.debugv("Interval complete, flushing feed")
            self._out.flush(endOfInterval=True)
            self.logger.debug("Generation of sample '%s' in app '%s' completed in %s seconds." % (
                self._sample.name, self._sample.app, timeDiffFrac))
        except Exception as e:
            self.logger.exception("Exception {} happened.".format(type(e)))
            raise e
Beispiel #3
0
 def replace_tokens(self, eventsDict, earliest, latest, ignore_tokens=False):
     """Iterate event tokens and replace them. This will help calculations for event size when tokens are used."""
     eventcount = 0
     send_events = []
     total_count = len(eventsDict)
     for targetevent in eventsDict:
         event = targetevent["_raw"]
         # Maintain state for every token in a given event, Hash contains keys for each file name which is
         # assigned a list of values picked from a random line in that file
         mvhash = {}
         host = targetevent['host']
         if hasattr(self._sample, "sequentialTimestamp") and self._sample.sequentialTimestamp and \
                 self._sample.generator != 'perdayvolumegenerator':
             pivot_timestamp = EventgenTimestamp.get_sequential_timestamp(earliest, latest, eventcount, total_count)
         else:
             pivot_timestamp = EventgenTimestamp.get_random_timestamp(earliest, latest)
         # Iterate tokens
         if not ignore_tokens:
             for token in self._sample.tokens:
                 token.mvhash = mvhash
                 event = token.replace(event, et=earliest, lt=latest, s=self._sample,
                                       pivot_timestamp=pivot_timestamp)
                 if token.replacementType == 'timestamp' and self._sample.timeField != '_raw':
                     self._sample.timestamp = None
                     token.replace(targetevent[self._sample.timeField], et=self._sample.earliestTime(),
                                   lt=self._sample.latestTime(), s=self._sample, pivot_timestamp=pivot_timestamp)
             if self._sample.hostToken:
                 # clear the host mvhash every time, because we need to re-randomize it
                 self._sample.hostToken.mvhash = {}
             if self._sample.hostToken:
                 host = self._sample.hostToken.replace(host, s=self._sample)
         try:
             time_val = int(time.mktime(pivot_timestamp.timetuple()))
         except Exception:
             time_val = int(time.mktime(self._sample.now().timetuple()))
         temp_event = {
             '_raw': event, 'index': random.choice(self._sample.index_list)if len(self._sample.index_list) else targetevent['index'], 'host': host, 'hostRegex': self._sample.hostRegex,
             'source': targetevent['source'], 'sourcetype': targetevent['sourcetype'], '_time': time_val}
         send_events.append(temp_event)
     return send_events
Beispiel #4
0
    def gen(self, count, earliest, latest, samplename=None):
        # 9/8/15 CS Check to make sure we have events to replay
        self._sample.loadSample()
        previous_event = None
        previous_event_timestamp = None
        self.current_time = self._sample.now()

        # If backfill exists, calculate the start of the backfill time relative to the current time.
        # Otherwise, backfill time equals to the current time
        self.backfill_time = self._sample.get_backfill_time(self.current_time)

        if not self._sample.backfill or self._sample.backfilldone:
            self.backfill_time = EventgenTimestamp.get_random_timestamp_backfill(
                earliest, latest, self._sample.earliest, self._sample.latest)

        for line in self._sample.get_loaded_sample():
            # Add newline to a raw line if necessary
            try:
                if line['_raw'][-1] != '\n':
                    line['_raw'] += '\n'

                index = line.get('index', self._sample.index)
                host = line.get('host', self._sample.host)
                hostRegex = line.get('hostRegex', self._sample.hostRegex)
                source = line.get('source', self._sample.source)
                sourcetype = line.get('sourcetype', self._sample.sourcetype)
                rpevent = {
                    '_raw': line['_raw'],
                    'index': index,
                    'host': host,
                    'hostRegex': hostRegex,
                    'source': source,
                    'sourcetype': sourcetype
                }
            except:
                if line[-1] != '\n':
                    line += '\n'

                rpevent = {
                    '_raw': line,
                    'index': self._sample.index,
                    'host': self._sample.host,
                    'hostRegex': self._sample.hostRegex,
                    'source': self._sample.source,
                    'sourcetype': self._sample.sourcetype
                }

            # If timestamp doesn't exist, the sample file should be fixed to include timestamp for every event.
            try:
                current_event_timestamp = self._sample.getTSFromEvent(
                    rpevent[self._sample.timeField])
            except Exception:
                try:
                    current_event_timestamp = self._sample.getTSFromEvent(
                        line[self._sample.timeField])
                except Exception:
                    try:
                        self.logger.debug(
                            "Sample timeField {} failed to locate. Trying to locate _time field."
                            .format(self._sample.timeField))
                        current_event_timestamp = self._sample.getTSFromEvent(
                            line["_time"])
                    except Exception:
                        self.logger.exception(
                            "Extracting timestamp from an event failed.")
                        continue

            # Always flush the first event
            if previous_event is None:
                previous_event = rpevent
                previous_event_timestamp = current_event_timestamp
                self.set_time_and_send(rpevent, self.backfill_time, earliest,
                                       latest)
                continue

            # Refer to the last event to calculate the new backfill time
            time_difference = current_event_timestamp - previous_event_timestamp

            if self.backfill_time + time_difference >= self.current_time:
                sleep_time = time_difference - (self.current_time -
                                                self.backfill_time)
                if self._sample.backfill and not self._sample.backfilldone:
                    time.sleep(sleep_time.seconds)
                self.current_time += sleep_time
                self.backfill_time = self.current_time
            else:
                self.backfill_time += time_difference
            previous_event = rpevent
            previous_event_timestamp = current_event_timestamp
            self.set_time_and_send(rpevent, self.backfill_time, earliest,
                                   latest)

        self._out.flush(endOfInterval=True)
        return
Beispiel #5
0
    def gen(self, count, earliest, latest, samplename=None):
        s = self._sample

        self.logger.debug(
            "Generating sample '%s' in app '%s' with count %d, et: '%s', lt '%s'"
            % (self._sample.name, self._sample.app, count, earliest, latest))
        startTime = datetime.datetime.now()

        # If we're random, fill random events from sampleDict into eventsDict
        if self._sample.randomizeEvents:
            eventsDict = []
            sdlen = len(self._sample.sampleDict)
            self.logger.debugv(
                "Random filling eventsDict for sample '%s' in app '%s' with %d events"
                % (self._sample.name, self._sample.app, count))
            # Count is -1, replay the whole file, but in randomizeEvents I think we'd want it to actually
            # just put as many events as there are in the file
            if count == -1:
                count = sdlen
            while len(eventsDict) < count:
                eventsDict.append(self._sample.sampleDict[random.randint(
                    0, sdlen - 1)])

        # If we're bundlelines, create count copies of the sampleDict
        elif self._sample.bundlelines:
            eventsDict = []
            self.logger.debugv(
                "Bundlelines, filling eventsDict for sample '%s' in app '%s' with %d copies of sampleDict"
                % (self._sample.name, self._sample.app, count))
            for x in xrange(count):
                eventsDict.extend(self._sample.sampleDict)

        # Otherwise fill count events into eventsDict or keep making copies of events out of sampleDict until
        # eventsDict is as big as count
        else:
            # If count is -1, play the whole file, else grab a subset
            if count == -1:
                count = len(self._sample.sampleDict)
            eventsDict = self._sample.sampleDict[0:count]

            ## Continue to fill events array until len(events) == count
            if len(eventsDict) < count:
                self.logger.debugv(
                    "Events fill for sample '%s' in app '%s' less than count (%s vs. %s); continuing fill"
                    % (self._sample.name, self._sample.app, len(eventsDict),
                       count))
                self.logger.debugv("Current eventsDict: %s" % eventsDict)
                # run a modulus on the size of the eventdict to figure out what the last event was.  Populate to count
                # from there.

                while len(eventsDict) < count:
                    if len(self._sample.sampleDict):
                        nextEventToUse = self._sample.sampleDict[
                            len(eventsDict) % len(self._sample.sampleDict)]
                        self.logger.debugv("Next event to add: %s" %
                                           nextEventToUse)
                        eventsDict.append(nextEventToUse)
                self.logger.debugv(
                    "Events fill complete for sample '%s' in app '%s' length %d"
                    % (self._sample.name, self._sample.app, len(eventsDict)))

        eventcount = 0
        for targetevent in eventsDict:
            try:
                event = targetevent['_raw']
                if event == "\n":
                    continue

                # Maintain state for every token in a given event
                # Hash contains keys for each file name which is assigned a list of values
                # picked from a random line in that file
                mvhash = {}

                pivot_timestamp = EventgenTimestamp.get_random_timestamp(
                    earliest, latest, self._sample.earliest,
                    self._sample.latest)

                ## Iterate tokens
                for token in self._sample.tokens:
                    token.mvhash = mvhash
                    # self.logger.debugv("Replacing token '%s' of type '%s' in event '%s'" % (token.token, token.replacementType, event))
                    self.logger.debugv(
                        "Sending event to token replacement: Event:{0} Token:{1}"
                        .format(event, token))
                    event = token.replace(event,
                                          et=earliest,
                                          lt=latest,
                                          s=self._sample,
                                          pivot_timestamp=pivot_timestamp)
                    self.logger.debugv("finished replacing token")
                    if token.replacementType == 'timestamp' and self._sample.timeField != '_raw':
                        self._sample.timestamp = None
                        token.replace(targetevent[self._sample.timeField],
                                      et=self._sample.earliestTime(),
                                      lt=self._sample.latestTime(),
                                      s=self._sample,
                                      pivot_timestamp=pivot_timestamp)
                if (self._sample.hostToken):
                    # clear the host mvhash every time, because we need to re-randomize it
                    self._sample.hostToken.mvhash = {}

                host = targetevent['host']
                if (self._sample.hostToken):
                    host = self._sample.hostToken.replace(host, s=self._sample)

                try:
                    time_val = int(time.mktime(pivot_timestamp.timetuple()))
                except Exception:
                    time_val = int(time.mktime(self._sample.now().timetuple()))

                l = [{
                    '_raw': event,
                    'index': targetevent['index'],
                    'host': host,
                    'hostRegex': self._sample.hostRegex,
                    'source': targetevent['source'],
                    'sourcetype': targetevent['sourcetype'],
                    '_time': time_val
                }]
                self.logger.debugv("Finished Processing event: %s" %
                                   eventcount)
                eventcount += 1
                self._out.bulksend(l)
                self._sample.timestamp = None
            except Exception as e:
                self.logger.exception("Exception {} happened.".format(type(e)))
                raise e

        try:
            endTime = datetime.datetime.now()
            timeDiff = endTime - startTime
            timeDiffFrac = "%d.%06d" % (timeDiff.seconds,
                                        timeDiff.microseconds)
            self.logger.debugv("Interval complete, flushing feed")
            self._out.flush(endOfInterval=True)
            self.logger.debug(
                "Generation of sample '%s' in app '%s' completed in %s seconds."
                % (self._sample.name, self._sample.app, timeDiffFrac))
        except Exception as e:
            self.logger.exception("Exception {} happened.".format(type(e)))
            raise e
Beispiel #6
0
    def gen(self, count, earliest, latest, samplename=None):
        # 9/8/15 CS Check to make sure we have events to replay
        self._sample.loadSample()
        previous_event = None
        previous_event_timestamp = None
        self.current_time = self._sample.now()

        # If backfill exists, calculate the start of the backfill time relative to the current time.
        # Otherwise, backfill time equals to the current time
        self.backfill_time = self._sample.get_backfill_time(self.current_time)

        if not self._sample.backfill or self._sample.backfilldone:
            self.backfill_time = EventgenTimestamp.get_random_timestamp_backfill(
                earliest, latest, self._sample.earliest, self._sample.latest)

        for line in self._sample.get_loaded_sample():
            # Add newline to a raw line if necessary
            try:
                if line['_raw'][-1] != '\n':
                    line['_raw'] += '\n'

                index = line.get('index', self._sample.index)
                host = line.get('host', self._sample.host)
                hostRegex = line.get('hostRegex', self._sample.hostRegex)
                source = line.get('source', self._sample.source)
                sourcetype = line.get('sourcetype', self._sample.sourcetype)
                rpevent = {
                    '_raw': line['_raw'], 'index': index, 'host': host, 'hostRegex': hostRegex, 'source': source,
                    'sourcetype': sourcetype}
            except:
                if line[-1] != '\n':
                    line += '\n'

                rpevent = {
                    '_raw': line, 'index': self._sample.index, 'host': self._sample.host, 'hostRegex':
                    self._sample.hostRegex, 'source': self._sample.source, 'sourcetype': self._sample.sourcetype}

            # If timestamp doesn't exist, the sample file should be fixed to include timestamp for every event.
            try:
                current_event_timestamp = self._sample.getTSFromEvent(rpevent[self._sample.timeField])
            except Exception:
                try:
                    current_event_timestamp = self._sample.getTSFromEvent(line[self._sample.timeField])
                except Exception:
                    try:
                        self.logger.debug("Sample timeField {} failed to locate. Trying to locate _time field.".format(
                            self._sample.timeField))
                        current_event_timestamp = self._sample.getTSFromEvent(line["_time"])
                    except Exception:
                        self.logger.exception("Extracting timestamp from an event failed.")
                        continue

            # Always flush the first event
            if previous_event is None:
                previous_event = rpevent
                previous_event_timestamp = current_event_timestamp
                self.set_time_and_send(rpevent, self.backfill_time, earliest, latest)
                continue

            # Refer to the last event to calculate the new backfill time
            time_difference = datetime.timedelta(seconds=(current_event_timestamp - previous_event_timestamp) .total_seconds() * self._sample.timeMultiple)

            if self.backfill_time + time_difference >= self.current_time:
                sleep_time = time_difference - (self.current_time - self.backfill_time)
                if not self._sample.backfill or self._sample.backfilldone:
                    time.sleep(sleep_time.seconds)
                self.current_time += sleep_time
                self.backfill_time = self.current_time
            else:
                self.backfill_time += time_difference
            previous_event = rpevent
            previous_event_timestamp = current_event_timestamp
            self.set_time_and_send(rpevent, self.backfill_time, earliest, latest)

        self._out.flush(endOfInterval=True)
        return