def replace_tokens(self, eventsDict, earliest, latest, ignore_tokens=False): """Iterate event tokens and replace them. This will help calculations for event size when tokens are used.""" eventcount = 0 send_events = [] total_count = len(eventsDict) for targetevent in eventsDict: event = targetevent["_raw"] # Maintain state for every token in a given event, Hash contains keys for each file name which is # assigned a list of values picked from a random line in that file mvhash = {} host = targetevent['host'] if hasattr(self._sample, "sequentialTimestamp") and self._sample.sequentialTimestamp and \ self._sample.generator != 'perdayvolumegenerator': pivot_timestamp = EventgenTimestamp.get_sequential_timestamp( earliest, latest, eventcount, total_count) else: pivot_timestamp = EventgenTimestamp.get_random_timestamp( earliest, latest) # Iterate tokens if not ignore_tokens: for token in self._sample.tokens: token.mvhash = mvhash event = token.replace(event, et=earliest, lt=latest, s=self._sample, pivot_timestamp=pivot_timestamp) if token.replacementType == 'timestamp' and self._sample.timeField != '_raw': self._sample.timestamp = None token.replace(targetevent[self._sample.timeField], et=self._sample.earliestTime(), lt=self._sample.latestTime(), s=self._sample, pivot_timestamp=pivot_timestamp) if self._sample.hostToken: # clear the host mvhash every time, because we need to re-randomize it self._sample.hostToken.mvhash = {} if self._sample.hostToken: host = self._sample.hostToken.replace(host, s=self._sample) try: time_val = int(time.mktime(pivot_timestamp.timetuple())) except Exception: time_val = int(time.mktime(self._sample.now().timetuple())) l = { '_raw': event, 'index': targetevent['index'], 'host': host, 'hostRegex': self._sample.hostRegex, 'source': targetevent['source'], 'sourcetype': targetevent['sourcetype'], '_time': time_val } send_events.append(l) return send_events
def build_events(self, eventsDict, startTime, earliest, latest): eventcount = 0 for targetevent in eventsDict: try: event = targetevent['_raw'] if event == "\n": continue # Maintain state for every token in a given event, Hash contains keys for each file name which is # assigned a list of values picked from a random line in that file mvhash = {} pivot_timestamp = EventgenTimestamp.get_random_timestamp(earliest, latest, self._sample.earliest, self._sample.latest) ## Iterate tokens for token in self._sample.tokens: token.mvhash = mvhash event = token.replace(event, et=earliest, lt=latest, s=self._sample, pivot_timestamp=pivot_timestamp) if token.replacementType == 'timestamp' and self._sample.timeField != '_raw': self._sample.timestamp = None token.replace(targetevent[self._sample.timeField], et=self._sample.earliestTime(), lt=self._sample.latestTime(), s=self._sample, pivot_timestamp=pivot_timestamp) if (self._sample.hostToken): # clear the host mvhash every time, because we need to re-randomize it self._sample.hostToken.mvhash = {} host = targetevent['host'] if (self._sample.hostToken): host = self._sample.hostToken.replace(host, s=self._sample) try: time_val = int(time.mktime(pivot_timestamp.timetuple())) except Exception: time_val = int(time.mktime(self._sample.now().timetuple())) l = [{'_raw': event, 'index': targetevent['index'], 'host': host, 'hostRegex': self._sample.hostRegex, 'source': targetevent['source'], 'sourcetype': targetevent['sourcetype'], '_time': time_val}] eventcount += 1 self._out.bulksend(l) self._sample.timestamp = None except Exception as e: self.logger.exception("Exception {} happened.".format(type(e))) raise e try: # TODO: Change this logic so that we don't lose all events if an exception is hit (try/except/break?) endTime = datetime.datetime.now() timeDiff = endTime - startTime timeDiffFrac = "%d.%06d" % (timeDiff.seconds, timeDiff.microseconds) self.logger.debugv("Interval complete, flushing feed") self._out.flush(endOfInterval=True) self.logger.debug("Generation of sample '%s' in app '%s' completed in %s seconds." % ( self._sample.name, self._sample.app, timeDiffFrac)) except Exception as e: self.logger.exception("Exception {} happened.".format(type(e))) raise e
def replace_tokens(self, eventsDict, earliest, latest, ignore_tokens=False): """Iterate event tokens and replace them. This will help calculations for event size when tokens are used.""" eventcount = 0 send_events = [] total_count = len(eventsDict) for targetevent in eventsDict: event = targetevent["_raw"] # Maintain state for every token in a given event, Hash contains keys for each file name which is # assigned a list of values picked from a random line in that file mvhash = {} host = targetevent['host'] if hasattr(self._sample, "sequentialTimestamp") and self._sample.sequentialTimestamp and \ self._sample.generator != 'perdayvolumegenerator': pivot_timestamp = EventgenTimestamp.get_sequential_timestamp(earliest, latest, eventcount, total_count) else: pivot_timestamp = EventgenTimestamp.get_random_timestamp(earliest, latest) # Iterate tokens if not ignore_tokens: for token in self._sample.tokens: token.mvhash = mvhash event = token.replace(event, et=earliest, lt=latest, s=self._sample, pivot_timestamp=pivot_timestamp) if token.replacementType == 'timestamp' and self._sample.timeField != '_raw': self._sample.timestamp = None token.replace(targetevent[self._sample.timeField], et=self._sample.earliestTime(), lt=self._sample.latestTime(), s=self._sample, pivot_timestamp=pivot_timestamp) if self._sample.hostToken: # clear the host mvhash every time, because we need to re-randomize it self._sample.hostToken.mvhash = {} if self._sample.hostToken: host = self._sample.hostToken.replace(host, s=self._sample) try: time_val = int(time.mktime(pivot_timestamp.timetuple())) except Exception: time_val = int(time.mktime(self._sample.now().timetuple())) temp_event = { '_raw': event, 'index': random.choice(self._sample.index_list)if len(self._sample.index_list) else targetevent['index'], 'host': host, 'hostRegex': self._sample.hostRegex, 'source': targetevent['source'], 'sourcetype': targetevent['sourcetype'], '_time': time_val} send_events.append(temp_event) return send_events
def gen(self, count, earliest, latest, samplename=None): # 9/8/15 CS Check to make sure we have events to replay self._sample.loadSample() previous_event = None previous_event_timestamp = None self.current_time = self._sample.now() # If backfill exists, calculate the start of the backfill time relative to the current time. # Otherwise, backfill time equals to the current time self.backfill_time = self._sample.get_backfill_time(self.current_time) if not self._sample.backfill or self._sample.backfilldone: self.backfill_time = EventgenTimestamp.get_random_timestamp_backfill( earliest, latest, self._sample.earliest, self._sample.latest) for line in self._sample.get_loaded_sample(): # Add newline to a raw line if necessary try: if line['_raw'][-1] != '\n': line['_raw'] += '\n' index = line.get('index', self._sample.index) host = line.get('host', self._sample.host) hostRegex = line.get('hostRegex', self._sample.hostRegex) source = line.get('source', self._sample.source) sourcetype = line.get('sourcetype', self._sample.sourcetype) rpevent = { '_raw': line['_raw'], 'index': index, 'host': host, 'hostRegex': hostRegex, 'source': source, 'sourcetype': sourcetype } except: if line[-1] != '\n': line += '\n' rpevent = { '_raw': line, 'index': self._sample.index, 'host': self._sample.host, 'hostRegex': self._sample.hostRegex, 'source': self._sample.source, 'sourcetype': self._sample.sourcetype } # If timestamp doesn't exist, the sample file should be fixed to include timestamp for every event. try: current_event_timestamp = self._sample.getTSFromEvent( rpevent[self._sample.timeField]) except Exception: try: current_event_timestamp = self._sample.getTSFromEvent( line[self._sample.timeField]) except Exception: try: self.logger.debug( "Sample timeField {} failed to locate. Trying to locate _time field." .format(self._sample.timeField)) current_event_timestamp = self._sample.getTSFromEvent( line["_time"]) except Exception: self.logger.exception( "Extracting timestamp from an event failed.") continue # Always flush the first event if previous_event is None: previous_event = rpevent previous_event_timestamp = current_event_timestamp self.set_time_and_send(rpevent, self.backfill_time, earliest, latest) continue # Refer to the last event to calculate the new backfill time time_difference = current_event_timestamp - previous_event_timestamp if self.backfill_time + time_difference >= self.current_time: sleep_time = time_difference - (self.current_time - self.backfill_time) if self._sample.backfill and not self._sample.backfilldone: time.sleep(sleep_time.seconds) self.current_time += sleep_time self.backfill_time = self.current_time else: self.backfill_time += time_difference previous_event = rpevent previous_event_timestamp = current_event_timestamp self.set_time_and_send(rpevent, self.backfill_time, earliest, latest) self._out.flush(endOfInterval=True) return
def gen(self, count, earliest, latest, samplename=None): s = self._sample self.logger.debug( "Generating sample '%s' in app '%s' with count %d, et: '%s', lt '%s'" % (self._sample.name, self._sample.app, count, earliest, latest)) startTime = datetime.datetime.now() # If we're random, fill random events from sampleDict into eventsDict if self._sample.randomizeEvents: eventsDict = [] sdlen = len(self._sample.sampleDict) self.logger.debugv( "Random filling eventsDict for sample '%s' in app '%s' with %d events" % (self._sample.name, self._sample.app, count)) # Count is -1, replay the whole file, but in randomizeEvents I think we'd want it to actually # just put as many events as there are in the file if count == -1: count = sdlen while len(eventsDict) < count: eventsDict.append(self._sample.sampleDict[random.randint( 0, sdlen - 1)]) # If we're bundlelines, create count copies of the sampleDict elif self._sample.bundlelines: eventsDict = [] self.logger.debugv( "Bundlelines, filling eventsDict for sample '%s' in app '%s' with %d copies of sampleDict" % (self._sample.name, self._sample.app, count)) for x in xrange(count): eventsDict.extend(self._sample.sampleDict) # Otherwise fill count events into eventsDict or keep making copies of events out of sampleDict until # eventsDict is as big as count else: # If count is -1, play the whole file, else grab a subset if count == -1: count = len(self._sample.sampleDict) eventsDict = self._sample.sampleDict[0:count] ## Continue to fill events array until len(events) == count if len(eventsDict) < count: self.logger.debugv( "Events fill for sample '%s' in app '%s' less than count (%s vs. %s); continuing fill" % (self._sample.name, self._sample.app, len(eventsDict), count)) self.logger.debugv("Current eventsDict: %s" % eventsDict) # run a modulus on the size of the eventdict to figure out what the last event was. Populate to count # from there. while len(eventsDict) < count: if len(self._sample.sampleDict): nextEventToUse = self._sample.sampleDict[ len(eventsDict) % len(self._sample.sampleDict)] self.logger.debugv("Next event to add: %s" % nextEventToUse) eventsDict.append(nextEventToUse) self.logger.debugv( "Events fill complete for sample '%s' in app '%s' length %d" % (self._sample.name, self._sample.app, len(eventsDict))) eventcount = 0 for targetevent in eventsDict: try: event = targetevent['_raw'] if event == "\n": continue # Maintain state for every token in a given event # Hash contains keys for each file name which is assigned a list of values # picked from a random line in that file mvhash = {} pivot_timestamp = EventgenTimestamp.get_random_timestamp( earliest, latest, self._sample.earliest, self._sample.latest) ## Iterate tokens for token in self._sample.tokens: token.mvhash = mvhash # self.logger.debugv("Replacing token '%s' of type '%s' in event '%s'" % (token.token, token.replacementType, event)) self.logger.debugv( "Sending event to token replacement: Event:{0} Token:{1}" .format(event, token)) event = token.replace(event, et=earliest, lt=latest, s=self._sample, pivot_timestamp=pivot_timestamp) self.logger.debugv("finished replacing token") if token.replacementType == 'timestamp' and self._sample.timeField != '_raw': self._sample.timestamp = None token.replace(targetevent[self._sample.timeField], et=self._sample.earliestTime(), lt=self._sample.latestTime(), s=self._sample, pivot_timestamp=pivot_timestamp) if (self._sample.hostToken): # clear the host mvhash every time, because we need to re-randomize it self._sample.hostToken.mvhash = {} host = targetevent['host'] if (self._sample.hostToken): host = self._sample.hostToken.replace(host, s=self._sample) try: time_val = int(time.mktime(pivot_timestamp.timetuple())) except Exception: time_val = int(time.mktime(self._sample.now().timetuple())) l = [{ '_raw': event, 'index': targetevent['index'], 'host': host, 'hostRegex': self._sample.hostRegex, 'source': targetevent['source'], 'sourcetype': targetevent['sourcetype'], '_time': time_val }] self.logger.debugv("Finished Processing event: %s" % eventcount) eventcount += 1 self._out.bulksend(l) self._sample.timestamp = None except Exception as e: self.logger.exception("Exception {} happened.".format(type(e))) raise e try: endTime = datetime.datetime.now() timeDiff = endTime - startTime timeDiffFrac = "%d.%06d" % (timeDiff.seconds, timeDiff.microseconds) self.logger.debugv("Interval complete, flushing feed") self._out.flush(endOfInterval=True) self.logger.debug( "Generation of sample '%s' in app '%s' completed in %s seconds." % (self._sample.name, self._sample.app, timeDiffFrac)) except Exception as e: self.logger.exception("Exception {} happened.".format(type(e))) raise e
def gen(self, count, earliest, latest, samplename=None): # 9/8/15 CS Check to make sure we have events to replay self._sample.loadSample() previous_event = None previous_event_timestamp = None self.current_time = self._sample.now() # If backfill exists, calculate the start of the backfill time relative to the current time. # Otherwise, backfill time equals to the current time self.backfill_time = self._sample.get_backfill_time(self.current_time) if not self._sample.backfill or self._sample.backfilldone: self.backfill_time = EventgenTimestamp.get_random_timestamp_backfill( earliest, latest, self._sample.earliest, self._sample.latest) for line in self._sample.get_loaded_sample(): # Add newline to a raw line if necessary try: if line['_raw'][-1] != '\n': line['_raw'] += '\n' index = line.get('index', self._sample.index) host = line.get('host', self._sample.host) hostRegex = line.get('hostRegex', self._sample.hostRegex) source = line.get('source', self._sample.source) sourcetype = line.get('sourcetype', self._sample.sourcetype) rpevent = { '_raw': line['_raw'], 'index': index, 'host': host, 'hostRegex': hostRegex, 'source': source, 'sourcetype': sourcetype} except: if line[-1] != '\n': line += '\n' rpevent = { '_raw': line, 'index': self._sample.index, 'host': self._sample.host, 'hostRegex': self._sample.hostRegex, 'source': self._sample.source, 'sourcetype': self._sample.sourcetype} # If timestamp doesn't exist, the sample file should be fixed to include timestamp for every event. try: current_event_timestamp = self._sample.getTSFromEvent(rpevent[self._sample.timeField]) except Exception: try: current_event_timestamp = self._sample.getTSFromEvent(line[self._sample.timeField]) except Exception: try: self.logger.debug("Sample timeField {} failed to locate. Trying to locate _time field.".format( self._sample.timeField)) current_event_timestamp = self._sample.getTSFromEvent(line["_time"]) except Exception: self.logger.exception("Extracting timestamp from an event failed.") continue # Always flush the first event if previous_event is None: previous_event = rpevent previous_event_timestamp = current_event_timestamp self.set_time_and_send(rpevent, self.backfill_time, earliest, latest) continue # Refer to the last event to calculate the new backfill time time_difference = datetime.timedelta(seconds=(current_event_timestamp - previous_event_timestamp) .total_seconds() * self._sample.timeMultiple) if self.backfill_time + time_difference >= self.current_time: sleep_time = time_difference - (self.current_time - self.backfill_time) if not self._sample.backfill or self._sample.backfilldone: time.sleep(sleep_time.seconds) self.current_time += sleep_time self.backfill_time = self.current_time else: self.backfill_time += time_difference previous_event = rpevent previous_event_timestamp = current_event_timestamp self.set_time_and_send(rpevent, self.backfill_time, earliest, latest) self._out.flush(endOfInterval=True) return