def build_events(self, eventsDict, startTime, earliest, latest, ignore_tokens=False): """Ready events for output by replacing tokens and updating the output queue""" # Replace tokens first so that perDayVolume evaluates the correct event length send_objects = self.replace_tokens(eventsDict, earliest, latest, ignore_tokens=ignore_tokens) try: self._out.bulksend(send_objects) self._sample.timestamp = None except Exception as e: logger.exception("Exception {} happened.".format(type(e))) raise e try: # TODO: Change this logic so that we don't lose all events if an exception is hit (try/except/break?) endTime = datetime.datetime.now() timeDiff = endTime - startTime timeDiffFrac = "%d.%06d" % (timeDiff.seconds, timeDiff.microseconds) logger.debug("Interval complete, flushing feed") self._out.flush(endOfInterval=True) logger.debug( "Generation of sample '%s' in app '%s' completed in %s seconds." % (self._sample.name, self._sample.app, timeDiffFrac)) except Exception as e: logger.exception("Exception {} happened.".format(type(e))) raise e
def _sendHTTPEvents(self, events): currentPayloadSize = 0 currentPayload = [] try: for event in events: # Reformat the event to fit the scs request spec # TODO: Move this logic to generator try: event["body"] = event.pop("_raw") event["timestamp"] = int(event.pop("_time") * 1000) event.pop("index") if "attributes" not in event: event["attributes"] = {} event["attributes"]["hostRegex"] = event.pop( "hostRegex") except: pass targetline = json.dumps(event) targetlinesize = len(targetline) # Continue building a current payload if the payload is less than the max size if (currentPayloadSize + targetlinesize) < self.scsHttpPayloadMax: currentPayload.append(event) currentPayloadSize += targetlinesize else: self.active_sessions.append( self.session.post( url=self.scsEndPoint, data=json.dumps(currentPayload), headers=self.header, verify=False, )) currentPayloadSize = targetlinesize currentPayload = [event] # Final flush of the leftover events if currentPayloadSize > 0: self.active_sessions.append( self.session.post( url=self.scsEndPoint, data=json.dumps(currentPayload), headers=self.header, verify=False, )) except Exception as e: logger.exception(str(e)) raise e
def updateConfig(self, config): OutputPlugin.updateConfig(self, config) try: if hasattr(self.config, 'httpeventServers') is False: if hasattr(self._sample, 'httpeventServers'): self.config.httpeventServers = self._sample.httpeventServers else: logger.error( 'outputMode %s but httpeventServers not specified for sample %s' % (self.name, self._sample.name)) raise NoServers( 'outputMode %s but httpeventServers not specified for sample %s' % (self.name, self._sample.name)) # set default output mode to round robin if hasattr( self.config, 'httpeventOutputMode') and self.config.httpeventOutputMode: self.httpeventoutputmode = config.httpeventOutputMode else: if hasattr(self._sample, 'httpeventOutputMode' ) and self._sample.httpeventOutputMode: self.httpeventoutputmode = self._sample.httpeventOutputMode else: self.httpeventoutputmode = 'roundrobin' if hasattr(self.config, 'httpeventMaxPayloadSize' ) and self.config.httpeventMaxPayloadSize: self.httpeventmaxsize = self.config.httpeventMaxPayloadSize else: if hasattr(self._sample, 'httpeventMaxPayloadSize' ) and self._sample.httpeventMaxPayloadSize: self.httpeventmaxsize = self._sample.httpeventMaxPayloadSize else: self.httpeventmaxsize = 10000 logger.debug("Currentmax size: %s " % self.httpeventmaxsize) if isinstance(config.httpeventServers, str): self.httpeventServers = json.loads(config.httpeventServers) else: self.httpeventServers = config.httpeventServers logger.debug("Setting up the connection pool for %s in %s" % (self._sample.name, self._app)) self.createConnections() logger.debug("Pool created.") logger.debug("Finished init of %s plugin." % self.name) except Exception as e: logger.exception(str(e))
def _sendHTTPEvents(self, payload): currentreadsize = 0 stringpayload = "" totalbytesexpected = 0 totalbytessent = 0 numberevents = len(payload) logger.debug("Sending %s events to splunk" % numberevents) for line in payload: logger.debug("line: %s " % line) targetline = json.dumps(line) logger.debug("targetline: %s " % targetline) targetlinesize = len(targetline) totalbytesexpected += targetlinesize if (int(currentreadsize) + int(targetlinesize)) <= int( self.httpeventmaxsize): stringpayload = stringpayload + targetline currentreadsize = currentreadsize + targetlinesize logger.debug("stringpayload: %s " % stringpayload) else: logger.debug( "Max size for payload hit, sending to splunk then continuing." ) try: self._transmitEvents(stringpayload) totalbytessent += len(stringpayload) currentreadsize = targetlinesize stringpayload = targetline except Exception as e: logger.exception(str(e)) raise e else: try: totalbytessent += len(stringpayload) logger.debug( "End of for loop hit for sending events to splunk, total bytes sent: %s ---- out of %s -----" % (totalbytessent, totalbytesexpected)) self._transmitEvents(stringpayload) except Exception as e: logger.exception(str(e)) raise e
def send_events(self, send_objects, startTime): """Ready events for output by replacing tokens and updating the output queue""" try: self._out.bulksend(send_objects) self._sample.timestamp = None except Exception as e: logger.exception("Exception {} happened.".format(type(e))) raise e try: # TODO: Change this logic so that we don't lose all events if an exception is hit (try/except/break?) endTime = datetime.datetime.now() timeDiff = endTime - startTime timeDiffFrac = "%d.%06d" % (timeDiff.seconds, timeDiff.microseconds) logger.debug("Interval complete, flushing feed") self._out.flush(endOfInterval=True) logger.debug( "Generation of sample '%s' in app '%s' completed in %s seconds." % (self._sample.name, self._sample.app, timeDiffFrac)) except Exception as e: logger.exception("Exception {} happened.".format(type(e))) raise e
def gen(self, count, earliest, latest, samplename=None): # 9/8/15 CS Check to make sure we have events to replay self._sample.loadSample() previous_event = None previous_event_timestamp = None self.current_time = self._sample.now() # If backfill exists, calculate the start of the backfill time relative to the current time. # Otherwise, backfill time equals to the current time self.backfill_time = self._sample.get_backfill_time(self.current_time) if not self._sample.backfill or self._sample.backfilldone: self.backfill_time = EventgenTimestamp.get_random_timestamp_backfill( earliest, latest, self._sample.earliest, self._sample.latest) for line in self._sample.get_loaded_sample(): # Add newline to a raw line if necessary try: if line['_raw'][-1] != '\n': line['_raw'] += '\n' index = line.get('index', self._sample.index) host = line.get('host', self._sample.host) hostRegex = line.get('hostRegex', self._sample.hostRegex) source = line.get('source', self._sample.source) sourcetype = line.get('sourcetype', self._sample.sourcetype) rpevent = { '_raw': line['_raw'], 'index': index, 'host': host, 'hostRegex': hostRegex, 'source': source, 'sourcetype': sourcetype } except: if line[-1] != '\n': line += '\n' rpevent = { '_raw': line, 'index': self._sample.index, 'host': self._sample.host, 'hostRegex': self._sample.hostRegex, 'source': self._sample.source, 'sourcetype': self._sample.sourcetype } # If timestamp doesn't exist, the sample file should be fixed to include timestamp for every event. try: current_event_timestamp = self._sample.getTSFromEvent( rpevent[self._sample.timeField]) except Exception: try: current_event_timestamp = self._sample.getTSFromEvent( line[self._sample.timeField]) except Exception: try: logger.error( "Sample timeField {} failed to locate. Trying to locate _time field." .format(self._sample.timeField)) current_event_timestamp = self._sample.getTSFromEvent( line["_time"]) except Exception: logger.exception( "Extracting timestamp from an event failed.") continue # Always flush the first event if previous_event is None: previous_event = rpevent previous_event_timestamp = current_event_timestamp self.set_time_and_send(rpevent, self.backfill_time, earliest, latest) continue # Refer to the last event to calculate the new backfill time time_difference = datetime.timedelta( seconds=(current_event_timestamp - previous_event_timestamp).total_seconds() * self._sample.timeMultiple) if self.backfill_time + time_difference >= self.current_time: sleep_time = time_difference - (self.current_time - self.backfill_time) if self._sample.backfill and not self._sample.backfilldone: time.sleep(sleep_time.seconds) self.current_time += sleep_time self.backfill_time = self.current_time else: self.backfill_time += time_difference previous_event = rpevent previous_event_timestamp = current_event_timestamp self.set_time_and_send(rpevent, self.backfill_time, earliest, latest) self._out.flush(endOfInterval=True) return
def load_sample_file(self): line_list = [] for line in self._sample.get_loaded_sample(): # Add newline to a raw line if necessary try: if line["_raw"][-1] != "\n": line["_raw"] += "\n" current_event_timestamp = False index = line.get("index", self._sample.index) host = line.get("host", self._sample.host) hostRegex = line.get("hostRegex", self._sample.hostRegex) source = line.get("source", self._sample.source) sourcetype = line.get("sourcetype", self._sample.sourcetype) rpevent = { "_raw": line["_raw"], "index": index, "host": host, "hostRegex": hostRegex, "source": source, "sourcetype": sourcetype, } except: if line[-1] != "\n": line += "\n" rpevent = { "_raw": line, "index": self._sample.index, "host": self._sample.host, "hostRegex": self._sample.hostRegex, "source": self._sample.source, "sourcetype": self._sample.sourcetype, } try: current_event_timestamp = self._sample.getTSFromEvent( rpevent[self._sample.timeField]) rpevent["base_time"] = current_event_timestamp except Exception: try: current_event_timestamp = self._sample.getTSFromEvent( line[self._sample.timeField]) rpevent["base_time"] = current_event_timestamp except Exception: try: logger.error( "Sample timeField {} failed to locate. Trying to locate _time field." .format(self._sample.timeField)) current_event_timestamp = self._sample.getTSFromEvent( line["_time"]) except Exception: logger.exception( "Extracting timestamp from an event failed.") continue line_list.append(rpevent) # now interate the list 1 time and figure out the time delta of every event current_event = None previous_event = None for index, line in enumerate(line_list): current_event = line # if it's the first event, there is no previous event. if index == 0: previous_event = current_event else: previous_event = line_list[index - 1] # Refer to the last event to calculate the new backfill time time_difference = ( current_event["base_time"] - previous_event["base_time"]) * self._sample.timeMultiple current_event["timediff"] = time_difference return line_list
def parse_args(): """Parse command line arguments""" subparser_dict = {} parser = argparse.ArgumentParser( prog="Eventgen", description="Splunk Event Generation Tool") parser.add_argument("-v", "--verbosity", action="count", help="increase output verbosity") parser.add_argument( "--version", action="version", default=False, version="%(prog)s " + EVENTGEN_VERSION, ) parser.add_argument("--modinput-mode", default=False) parser.add_argument("--counter-output", action="store_true", default=False) subparsers = parser.add_subparsers(title="commands", help="valid subcommands", dest="subcommand") # Generate subparser generate_subparser = subparsers.add_parser( "generate", help="Generate events using a supplied config file") generate_subparser.add_argument( "configfile", help= "Location of eventgen.conf, app folder, or name of an app in $SPLUNK_HOME/etc/apps to run", ) generate_subparser.add_argument( "-s", "--sample", help="Run specified sample only, disabling all other samples") generate_subparser.add_argument( "--keepoutput", action="store_true", help="Keep original outputMode for the sample", ) generate_subparser.add_argument("--devnull", action="store_true", help="Set outputMode to devnull") generate_subparser.add_argument( "--modinput", action="store_true", help="Set outputMode to modinput, to see metadata", ) generate_subparser.add_argument("-c", "--count", type=int, help="Set sample count") generate_subparser.add_argument("-i", "--interval", type=int, help="Set sample interval") generate_subparser.add_argument("-b", "--backfill", help="Set time to backfill from") generate_subparser.add_argument( "-e", "--end", help="Set time to end generation at or a number of intervals to run", ) generate_subparser.add_argument( "--generators", type=int, help="Number of GeneratorWorkers (mappers)") generate_subparser.add_argument("--outputters", type=int, help="Number of OutputWorkers (reducers)") generate_subparser.add_argument("--disableOutputQueue", action="store_true", help="Disable reducer step") generate_subparser.add_argument( "--multiprocess", action="store_true", help="Use multiprocesing instead of threading", ) generate_subparser.add_argument("--profiler", action="store_true", help="Turn on cProfiler") generate_subparser.add_argument( "--generator-queue-size", type=int, default=500, help="the max queue size for the " "generator queue, timer object puts all the generator tasks into this queue, default max size is 500", ) generate_subparser.add_argument("--disable-logging", action="store_true", help="disable logging") # Build subparser build_subparser = subparsers.add_parser( "build", help="Will build different forms of sa-eventgen") build_subparser.add_argument( "--mode", type=str, default="splunk-app", help= "Specify what type of package to build, defaults to splunk-app mode.", ) build_subparser.add_argument( "--destination", help="Specify where to store the output of the build command.") build_subparser.add_argument( "--remove", default=True, help="Remove the build directory after completion. Defaults to True", ) # Service subparser service_subparser = subparsers.add_parser( "service", help= ("Run Eventgen as an api server. Parameters for starting this service can be defined as either env" "variables or CLI arguments, where env variables takes precedence. See help for more info." ), ) service_subparser.add_argument( "--role", "-r", type=str, default=None, required=True, choices=["controller", "server", "standalone"], help= "Define the role for this Eventgen node. Options: controller, server, standalone", ) service_subparser.add_argument("--redis-host", type=str, default="127.0.0.1", help="Redis Host") service_subparser.add_argument("--redis-port", type=str, default="6379", help="Redis Port") service_subparser.add_argument( "--web-server-port", type=str, default="9500", help="Port you want to run a web server on", ) service_subparser.add_argument( "--multithread", action="store_true", help="Use multi-thread instead of multi-process", ) # Help subparser # NOTE: Keep this at the end so we can use the subparser_dict.keys() to display valid commands help_subparser = subparsers.add_parser( "help", help="Display usage on a subcommand") helpstr = "Help on a specific command, valid commands are: " + ", ".join( list(subparser_dict.keys()) + ["help"]) help_subparser.add_argument("command", nargs="?", default="default", help=helpstr) # add subparsers to the subparser dict, this will be used later for usage / help statements. subparser_dict["generate"] = generate_subparser subparser_dict["build"] = build_subparser subparser_dict["help"] = help_subparser if len(sys.argv) == 1: parser.print_help() sys.exit(2) args = parser.parse_args() if args.version: args.print_version() sys.exit(0) if "subcommand" not in args: parser.print_help() sys.exit(2) if args.subcommand == "service": if not args.role: msg = "Role is undefined. Please specify a role for this Eventgen service using --role/-r." logger.exception(msg) raise Exception(msg) if args.subcommand == "help" and args.command == "default": parser.print_help() sys.exit(0) if args.subcommand == "help": if args.command in list(subparser_dict.keys()): subparser_dict[args.command].print_help() else: parser.print_help() sys.exit(0) elif args.subcommand == "build" and not args.destination: print( "No destination passed for storing output file, attempting to use the current working dir." ) # Allow passing of a Splunk app on the command line and expand the full path before passing up the chain if hasattr(args, "configfile") and not os.path.exists(args.configfile): if "SPLUNK_HOME" in os.environ: if os.path.isdir( os.path.join(os.environ["SPLUNK_HOME"], "etc", "apps", args.configfile)): args.configfile = os.path.join(os.environ["SPLUNK_HOME"], "etc", "apps", args.configfile) else: args.configfile = None return args
def real_run(self): """ Worker function of the Timer class. Determine whether a plugin is queueable, and either place an item in the generator queue for that plugin or call the plugin's gen method directly. """ if self.sample.delay > 0: logger.info("Sample set to delay %s, sleeping." % self.sample.delay) time.sleep(self.sample.delay) logger.debug("Timer creating plugin for '%s'" % self.sample.name) local_time = datetime.datetime.now() end = False raw_event_size = self.predict_event_size() if self.end: if int(self.end) == 0: logger.info( "End = 0, no events will be generated for sample '%s'" % self.sample.name) end = True elif int(self.end) == -1: logger.info( "End is set to -1. Will be running without stopping for sample %s" % self.sample.name) while not end: try: # Need to be able to stop threads by the main thread or this thread. self.config will stop all threads # referenced in the config object, while, self.stopping will only stop this one. if self.config.stopping or self.stopping: end = True self.rater.update_options( config=self.config, sample=self.sample, generatorQueue=self.generatorQueue, outputQueue=self.outputQueue, outputPlugin=self.outputPlugin, generatorPlugin=self.generatorPlugin, ) count = self.rater.rate() # First run of the generator, see if we have any backfill work to do. if self.countdown <= 0: if self.sample.backfill and not self.sample.backfilldone: self.backrater.update_options( config=self.config, sample=self.sample, generatorQueue=self.generatorQueue, outputQueue=self.outputQueue, outputPlugin=self.outputPlugin, generatorPlugin=self.generatorPlugin, samplerater=self.rater, ) self.backrater.queue_it(count) else: if self.sample.generator == "perdayvolumegenerator": self.perdayrater.update_options( config=self.config, sample=self.sample, generatorQueue=self.generatorQueue, outputQueue=self.outputQueue, outputPlugin=self.outputPlugin, generatorPlugin=self.generatorPlugin, samplerater=self.rater, raweventsize=raw_event_size, ) self.perdayrater.rate() self.perdayrater.queue_it(count) self.rater.queue_it(count) self.countdown = self.interval self.executions += 1 except Exception as e: logger.exception(str(e)) if self.stopping: end = True pass # Sleep until we're supposed to wake up and generate more events if self.countdown == 0: self.countdown = self.interval # 8/20/15 CS Adding support for ending generation at a certain time if self.end: if int(self.end) == -1: time.sleep(self.time) self.countdown -= self.time continue # 3/16/16 CS Adding support for ending on a number of executions instead of time # Should be fine with storing state in this sample object since each sample has it's own unique # timer thread if not self.endts: if self.executions >= int(self.end): logger.info( "End executions %d reached, ending generation of sample '%s'" % (int(self.end), self.sample.name)) self.stopping = True end = True elif local_time >= self.endts: logger.info( "End Time '%s' reached, ending generation of sample '%s'" % (self.sample.endts, self.sample.name)) self.stopping = True end = True time.sleep(self.time) self.countdown -= self.time
def gen(self, count, earliest, latest, samplename=None): # TODO: Figure out how to gracefully tell generator plugins to exit when there is an error. try: from jinja2 import Environment, FileSystemLoader self.target_count = count # assume that if there is no "count" field, we want to run 1 time, and only one time. if self.target_count == -1: self.target_count = 1 self.earliest = earliest self.latest = latest if hasattr(self._sample, "jinja_count_type"): if self._sample.jinja_count_type in [ "line_count", "cycles", "perDayVolume" ]: self.jinja_count_type = self._sample.jinja_count_type startTime = datetime.datetime.now() # if eventgen is running as Splunk app the configfile is None sample_dir = self._sample.sampleDir if self._sample.splunkEmbedded is True: splunk_home = os.environ["SPLUNK_HOME"] app_name = getattr(self._sample, 'app', 'SA-Eventgen') sample_dir = os.path.join(splunk_home, 'etc', 'apps', app_name, 'samples') if not hasattr(self._sample, "jinja_template_dir"): template_dir = 'templates' else: template_dir = self._sample.jinja_template_dir if not os.path.isabs(template_dir): target_template_dir = os.path.join(sample_dir, template_dir) else: target_template_dir = template_dir logger.info('set jinja template path to %s', target_template_dir) if not hasattr(self._sample, "jinja_target_template"): raise CantFindTemplate( "Template to load not specified in eventgen conf for stanza. Skipping Stanza" ) jinja_env = Environment(loader=FileSystemLoader( [target_template_dir], encoding='utf-8', followlinks=False), extensions=[ 'jinja2.ext.do', 'jinja2.ext.with_', 'jinja2.ext.loopcontrols', JinjaTime ], line_statement_prefix="#", line_comment_prefix="##") jinja_loaded_template = jinja_env.get_template( str(self._sample.jinja_target_template)) if hasattr(self._sample, 'jinja_variables'): jinja_loaded_vars = json.loads(self._sample.jinja_variables) else: jinja_loaded_vars = None # make the default generator vars accessable to jinja jinja_loaded_vars["eventgen_count"] = self.current_count jinja_loaded_vars["eventgen_maxcount"] = self.target_count jinja_loaded_vars["eventgen_earliest"] = self.earliest self.earliest_epoch = ( self.earliest - datetime.datetime(1970, 1, 1)).total_seconds() jinja_loaded_vars["eventgen_earliest_epoch"] = self.earliest_epoch jinja_loaded_vars["eventgen_latest"] = self.latest jinja_loaded_vars["eventgen_latest_epoch"] = ( self.latest - datetime.datetime(1970, 1, 1)).total_seconds() self.latest_epoch = ( self.latest - datetime.datetime(1970, 1, 1)).total_seconds() while self.current_count < self.target_count: self.end_of_cycle = False jinja_loaded_vars["eventgen_count"] = self.current_count jinja_loaded_vars["eventgen_target_time_earliest"], jinja_loaded_vars["eventgen_target_time_latest"], \ jinja_loaded_vars["eventgen_target_time_slice_size"], \ jinja_loaded_vars["eventgen_target_time_epoch"] = \ JinjaTime._get_time_slice(self.earliest_epoch, self.latest_epoch, self.target_count, self.current_count, slice_type="random") self.jinja_stream = jinja_loaded_template.stream( jinja_loaded_vars) lines_out = [] try: for raw_line in self.jinja_stream: # trim the newline char for jinja output # it is quite normal to output empty newlines in jinja line = raw_line.strip() if line: # TODO: Time can be supported by self._sample.timestamp, should probably set that up here. try: target_line = json.loads(line) except ValueError as e: logger.error( "Unable to parse Jinja's return. Line: {0}" .format(line)) logger.error( "Parse Failure Reason: {0}".format( e.message)) logger.error( "Please note, you must meet the requirements for json.loads in python if you have" + "not installed ujson. Native python does not support multi-line events." ) continue current_line_keys = list(target_line.keys()) if "_time" not in current_line_keys: # TODO: Add a custom exception here raise Exception( "No _time field supplied, please add time to your jinja template." ) if "_raw" not in current_line_keys: # TODO: Add a custom exception here raise Exception( "No _raw field supplied, please add time to your jinja template." ) if "host" not in current_line_keys: target_line["host"] = self._sample.host if "hostRegex" not in current_line_keys: target_line[ "hostRegex"] = self._sample.hostRegex if "source" not in current_line_keys: target_line["source"] = self._sample.source if "sourcetype" not in current_line_keys: target_line[ "sourcetype"] = self._sample.sourcetype if "index" not in current_line_keys: target_line["index"] = self._sample.index lines_out.append(target_line) except TypeError as e: logger.exception(str(e)) self.end_of_cycle = True self._increment_count(lines_out) self._out.bulksend(lines_out) endTime = datetime.datetime.now() timeDiff = endTime - startTime timeDiffFrac = "%d.%06d" % (timeDiff.seconds, timeDiff.microseconds) logger.debug("Interval complete, flushing feed") self._out.flush(endOfInterval=True) logger.info("Generation of sample '%s' completed in %s seconds." % (self._sample.name, timeDiffFrac)) return 0 except Exception as e: logger.exception(str(e)) return 1
def real_run(self): """ Worker function of the Timer class. Determine whether a plugin is queueable, and either place an item in the generator queue for that plugin or call the plugin's gen method directly. """ if self.sample.delay > 0: logger.info("Sample set to delay %s, sleeping." % self.sample.delay) time.sleep(self.sample.delay) logger.debug("Timer creating plugin for '%s'" % self.sample.name) end = False previous_count_left = 0 raw_event_size = self.predict_event_size() if self.end: if int(self.end) == 0: logger.info( "End = 0, no events will be generated for sample '%s'" % self.sample.name) end = True elif int(self.end) == -1: logger.info( "End is set to -1. Will be running without stopping for sample %s" % self.sample.name) while not end: # Need to be able to stop threads by the main thread or this thread. self.config will stop all threads # referenced in the config object, while, self.stopping will only stop this one. if self.config.stopping or self.stopping: end = True continue count = self.rater.rate() # First run of the generator, see if we have any backfill work to do. if self.countdown <= 0: if self.sample.backfill and not self.sample.backfilldone: realtime = self.sample.now(realnow=True) if "-" in self.sample.backfill[0]: mathsymbol = "-" else: mathsymbol = "+" backfillnumber = "" backfillletter = "" for char in self.sample.backfill: if char.isdigit(): backfillnumber += char elif char != "-": backfillletter += char backfillearliest = timeParserTimeMath(plusminus=mathsymbol, num=backfillnumber, unit=backfillletter, ret=realtime) while backfillearliest < realtime: if self.end and self.executions == int(self.end): logger.info( "End executions %d reached, ending generation of sample '%s'" % (int(self.end), self.sample.name)) break et = backfillearliest lt = timeParserTimeMath(plusminus="+", num=self.interval, unit="s", ret=et) copy_sample = copy.copy(self.sample) tokens = copy.deepcopy(self.sample.tokens) copy_sample.tokens = tokens genPlugin = self.generatorPlugin(sample=copy_sample) # need to make sure we set the queue right if we're using multiprocessing or thread modes genPlugin.updateConfig(config=self.config, outqueue=self.outputQueue) genPlugin.updateCounts(count=count, start_time=et, end_time=lt) try: self.generatorQueue.put(genPlugin, True, 3) self.executions += 1 backfillearliest = lt except Full: logger.warning( "Generator Queue Full. Reput the backfill generator task later. %d backfill generators are dispatched.", self.executions) backfillearliest = et realtime = self.sample.now(realnow=True) self.sample.backfilldone = True else: # 12/15/13 CS Moving the rating to a separate plugin architecture # Save previous interval count left to avoid perdayvolumegenerator drop small tasks if self.sample.generator == 'perdayvolumegenerator': count = self.rater.rate() + previous_count_left if 0 < count < raw_event_size: logger.info( "current interval size is {}, which is smaller than a raw event size {}." .format(count, raw_event_size) + "Wait for the next turn.") previous_count_left = count self.countdown = self.interval self.executions += 1 continue else: previous_count_left = 0 else: count = self.rater.rate() et = self.sample.earliestTime() lt = self.sample.latestTime() try: if count < 1 and count != -1: logger.info( "There is no data to be generated in worker {0} because the count is {1}." .format(self.sample.config.generatorWorkers, count)) else: # Spawn workers at the beginning of job rather than wait for next interval logger.info( "Starting '%d' generatorWorkers for sample '%s'" % (self.sample.config.generatorWorkers, self.sample.name)) for worker_id in range( self.config.generatorWorkers): copy_sample = copy.copy(self.sample) tokens = copy.deepcopy(self.sample.tokens) copy_sample.tokens = tokens genPlugin = self.generatorPlugin( sample=copy_sample) # Adjust queue for threading mode genPlugin.updateConfig( config=self.config, outqueue=self.outputQueue) genPlugin.updateCounts(count=count, start_time=et, end_time=lt) try: self.generatorQueue.put(genPlugin) logger.debug(( "Worker# {0}: Put {1} MB of events in queue for sample '{2}'" + "with et '{3}' and lt '{4}'").format( worker_id, round((count / 1024.0 / 1024), 4), self.sample.name, et, lt)) except Full: logger.warning( "Generator Queue Full. Skipping current generation." ) self.executions += 1 except Exception as e: logger.exception(str(e)) if self.stopping: end = True pass # Sleep until we're supposed to wake up and generate more events self.countdown = self.interval # 8/20/15 CS Adding support for ending generation at a certain time if self.end: if int(self.end) == -1: time.sleep(self.time) self.countdown -= self.time continue # 3/16/16 CS Adding support for ending on a number of executions instead of time # Should be fine with storing state in this sample object since each sample has it's own unique # timer thread if not self.endts: if self.executions >= int(self.end): logger.info( "End executions %d reached, ending generation of sample '%s'" % (int(self.end), self.sample.name)) self.stopping = True end = True elif lt >= self.endts: logger.info( "End Time '%s' reached, ending generation of sample '%s'" % (self.sample.endts, self.sample.name)) self.stopping = True end = True else: time.sleep(self.time) self.countdown -= self.time