Beispiel #1
0
 def build_events(self,
                  eventsDict,
                  startTime,
                  earliest,
                  latest,
                  ignore_tokens=False):
     """Ready events for output by replacing tokens and updating the output queue"""
     # Replace tokens first so that perDayVolume evaluates the correct event length
     send_objects = self.replace_tokens(eventsDict,
                                        earliest,
                                        latest,
                                        ignore_tokens=ignore_tokens)
     try:
         self._out.bulksend(send_objects)
         self._sample.timestamp = None
     except Exception as e:
         logger.exception("Exception {} happened.".format(type(e)))
         raise e
     try:
         # TODO: Change this logic so that we don't lose all events if an exception is hit (try/except/break?)
         endTime = datetime.datetime.now()
         timeDiff = endTime - startTime
         timeDiffFrac = "%d.%06d" % (timeDiff.seconds,
                                     timeDiff.microseconds)
         logger.debug("Interval complete, flushing feed")
         self._out.flush(endOfInterval=True)
         logger.debug(
             "Generation of sample '%s' in app '%s' completed in %s seconds."
             % (self._sample.name, self._sample.app, timeDiffFrac))
     except Exception as e:
         logger.exception("Exception {} happened.".format(type(e)))
         raise e
Beispiel #2
0
    def _sendHTTPEvents(self, events):
        currentPayloadSize = 0
        currentPayload = []
        try:
            for event in events:
                # Reformat the event to fit the scs request spec
                # TODO: Move this logic to generator
                try:
                    event["body"] = event.pop("_raw")
                    event["timestamp"] = int(event.pop("_time") * 1000)
                    event.pop("index")
                    if "attributes" not in event:
                        event["attributes"] = {}
                        event["attributes"]["hostRegex"] = event.pop(
                            "hostRegex")
                except:
                    pass

                targetline = json.dumps(event)
                targetlinesize = len(targetline)

                # Continue building a current payload if the payload is less than the max size
                if (currentPayloadSize +
                        targetlinesize) < self.scsHttpPayloadMax:
                    currentPayload.append(event)
                    currentPayloadSize += targetlinesize
                else:
                    self.active_sessions.append(
                        self.session.post(
                            url=self.scsEndPoint,
                            data=json.dumps(currentPayload),
                            headers=self.header,
                            verify=False,
                        ))
                    currentPayloadSize = targetlinesize
                    currentPayload = [event]

            # Final flush of the leftover events
            if currentPayloadSize > 0:
                self.active_sessions.append(
                    self.session.post(
                        url=self.scsEndPoint,
                        data=json.dumps(currentPayload),
                        headers=self.header,
                        verify=False,
                    ))

        except Exception as e:
            logger.exception(str(e))
            raise e
Beispiel #3
0
 def updateConfig(self, config):
     OutputPlugin.updateConfig(self, config)
     try:
         if hasattr(self.config, 'httpeventServers') is False:
             if hasattr(self._sample, 'httpeventServers'):
                 self.config.httpeventServers = self._sample.httpeventServers
             else:
                 logger.error(
                     'outputMode %s but httpeventServers not specified for sample %s'
                     % (self.name, self._sample.name))
                 raise NoServers(
                     'outputMode %s but httpeventServers not specified for sample %s'
                     % (self.name, self._sample.name))
         # set default output mode to round robin
         if hasattr(
                 self.config,
                 'httpeventOutputMode') and self.config.httpeventOutputMode:
             self.httpeventoutputmode = config.httpeventOutputMode
         else:
             if hasattr(self._sample, 'httpeventOutputMode'
                        ) and self._sample.httpeventOutputMode:
                 self.httpeventoutputmode = self._sample.httpeventOutputMode
             else:
                 self.httpeventoutputmode = 'roundrobin'
         if hasattr(self.config, 'httpeventMaxPayloadSize'
                    ) and self.config.httpeventMaxPayloadSize:
             self.httpeventmaxsize = self.config.httpeventMaxPayloadSize
         else:
             if hasattr(self._sample, 'httpeventMaxPayloadSize'
                        ) and self._sample.httpeventMaxPayloadSize:
                 self.httpeventmaxsize = self._sample.httpeventMaxPayloadSize
             else:
                 self.httpeventmaxsize = 10000
         logger.debug("Currentmax size: %s " % self.httpeventmaxsize)
         if isinstance(config.httpeventServers, str):
             self.httpeventServers = json.loads(config.httpeventServers)
         else:
             self.httpeventServers = config.httpeventServers
         logger.debug("Setting up the connection pool for %s in %s" %
                      (self._sample.name, self._app))
         self.createConnections()
         logger.debug("Pool created.")
         logger.debug("Finished init of %s plugin." % self.name)
     except Exception as e:
         logger.exception(str(e))
Beispiel #4
0
 def _sendHTTPEvents(self, payload):
     currentreadsize = 0
     stringpayload = ""
     totalbytesexpected = 0
     totalbytessent = 0
     numberevents = len(payload)
     logger.debug("Sending %s events to splunk" % numberevents)
     for line in payload:
         logger.debug("line: %s " % line)
         targetline = json.dumps(line)
         logger.debug("targetline: %s " % targetline)
         targetlinesize = len(targetline)
         totalbytesexpected += targetlinesize
         if (int(currentreadsize) + int(targetlinesize)) <= int(
                 self.httpeventmaxsize):
             stringpayload = stringpayload + targetline
             currentreadsize = currentreadsize + targetlinesize
             logger.debug("stringpayload: %s " % stringpayload)
         else:
             logger.debug(
                 "Max size for payload hit, sending to splunk then continuing."
             )
             try:
                 self._transmitEvents(stringpayload)
                 totalbytessent += len(stringpayload)
                 currentreadsize = targetlinesize
                 stringpayload = targetline
             except Exception as e:
                 logger.exception(str(e))
                 raise e
     else:
         try:
             totalbytessent += len(stringpayload)
             logger.debug(
                 "End of for loop hit for sending events to splunk, total bytes sent: %s ---- out of %s -----"
                 % (totalbytessent, totalbytesexpected))
             self._transmitEvents(stringpayload)
         except Exception as e:
             logger.exception(str(e))
             raise e
Beispiel #5
0
 def send_events(self, send_objects, startTime):
     """Ready events for output by replacing tokens and updating the output queue"""
     try:
         self._out.bulksend(send_objects)
         self._sample.timestamp = None
     except Exception as e:
         logger.exception("Exception {} happened.".format(type(e)))
         raise e
     try:
         # TODO: Change this logic so that we don't lose all events if an exception is hit (try/except/break?)
         endTime = datetime.datetime.now()
         timeDiff = endTime - startTime
         timeDiffFrac = "%d.%06d" % (timeDiff.seconds,
                                     timeDiff.microseconds)
         logger.debug("Interval complete, flushing feed")
         self._out.flush(endOfInterval=True)
         logger.debug(
             "Generation of sample '%s' in app '%s' completed in %s seconds."
             % (self._sample.name, self._sample.app, timeDiffFrac))
     except Exception as e:
         logger.exception("Exception {} happened.".format(type(e)))
         raise e
Beispiel #6
0
    def gen(self, count, earliest, latest, samplename=None):
        # 9/8/15 CS Check to make sure we have events to replay
        self._sample.loadSample()
        previous_event = None
        previous_event_timestamp = None
        self.current_time = self._sample.now()

        # If backfill exists, calculate the start of the backfill time relative to the current time.
        # Otherwise, backfill time equals to the current time
        self.backfill_time = self._sample.get_backfill_time(self.current_time)

        if not self._sample.backfill or self._sample.backfilldone:
            self.backfill_time = EventgenTimestamp.get_random_timestamp_backfill(
                earliest, latest, self._sample.earliest, self._sample.latest)

        for line in self._sample.get_loaded_sample():
            # Add newline to a raw line if necessary
            try:
                if line['_raw'][-1] != '\n':
                    line['_raw'] += '\n'

                index = line.get('index', self._sample.index)
                host = line.get('host', self._sample.host)
                hostRegex = line.get('hostRegex', self._sample.hostRegex)
                source = line.get('source', self._sample.source)
                sourcetype = line.get('sourcetype', self._sample.sourcetype)
                rpevent = {
                    '_raw': line['_raw'],
                    'index': index,
                    'host': host,
                    'hostRegex': hostRegex,
                    'source': source,
                    'sourcetype': sourcetype
                }
            except:
                if line[-1] != '\n':
                    line += '\n'

                rpevent = {
                    '_raw': line,
                    'index': self._sample.index,
                    'host': self._sample.host,
                    'hostRegex': self._sample.hostRegex,
                    'source': self._sample.source,
                    'sourcetype': self._sample.sourcetype
                }

            # If timestamp doesn't exist, the sample file should be fixed to include timestamp for every event.
            try:
                current_event_timestamp = self._sample.getTSFromEvent(
                    rpevent[self._sample.timeField])
            except Exception:
                try:
                    current_event_timestamp = self._sample.getTSFromEvent(
                        line[self._sample.timeField])
                except Exception:
                    try:
                        logger.error(
                            "Sample timeField {} failed to locate. Trying to locate _time field."
                            .format(self._sample.timeField))
                        current_event_timestamp = self._sample.getTSFromEvent(
                            line["_time"])
                    except Exception:
                        logger.exception(
                            "Extracting timestamp from an event failed.")
                        continue

            # Always flush the first event
            if previous_event is None:
                previous_event = rpevent
                previous_event_timestamp = current_event_timestamp
                self.set_time_and_send(rpevent, self.backfill_time, earliest,
                                       latest)
                continue

            # Refer to the last event to calculate the new backfill time
            time_difference = datetime.timedelta(
                seconds=(current_event_timestamp -
                         previous_event_timestamp).total_seconds() *
                self._sample.timeMultiple)

            if self.backfill_time + time_difference >= self.current_time:
                sleep_time = time_difference - (self.current_time -
                                                self.backfill_time)
                if self._sample.backfill and not self._sample.backfilldone:
                    time.sleep(sleep_time.seconds)
                self.current_time += sleep_time
                self.backfill_time = self.current_time
            else:
                self.backfill_time += time_difference
            previous_event = rpevent
            previous_event_timestamp = current_event_timestamp
            self.set_time_and_send(rpevent, self.backfill_time, earliest,
                                   latest)

        self._out.flush(endOfInterval=True)
        return
Beispiel #7
0
    def load_sample_file(self):
        line_list = []
        for line in self._sample.get_loaded_sample():
            # Add newline to a raw line if necessary
            try:
                if line["_raw"][-1] != "\n":
                    line["_raw"] += "\n"
                current_event_timestamp = False
                index = line.get("index", self._sample.index)
                host = line.get("host", self._sample.host)
                hostRegex = line.get("hostRegex", self._sample.hostRegex)
                source = line.get("source", self._sample.source)
                sourcetype = line.get("sourcetype", self._sample.sourcetype)
                rpevent = {
                    "_raw": line["_raw"],
                    "index": index,
                    "host": host,
                    "hostRegex": hostRegex,
                    "source": source,
                    "sourcetype": sourcetype,
                }
            except:
                if line[-1] != "\n":
                    line += "\n"

                rpevent = {
                    "_raw": line,
                    "index": self._sample.index,
                    "host": self._sample.host,
                    "hostRegex": self._sample.hostRegex,
                    "source": self._sample.source,
                    "sourcetype": self._sample.sourcetype,
                }
            try:
                current_event_timestamp = self._sample.getTSFromEvent(
                    rpevent[self._sample.timeField])
                rpevent["base_time"] = current_event_timestamp
            except Exception:
                try:
                    current_event_timestamp = self._sample.getTSFromEvent(
                        line[self._sample.timeField])
                    rpevent["base_time"] = current_event_timestamp
                except Exception:
                    try:
                        logger.error(
                            "Sample timeField {} failed to locate. Trying to locate _time field."
                            .format(self._sample.timeField))
                        current_event_timestamp = self._sample.getTSFromEvent(
                            line["_time"])
                    except Exception:
                        logger.exception(
                            "Extracting timestamp from an event failed.")
                        continue
            line_list.append(rpevent)
        # now interate the list 1 time and figure out the time delta of every event
        current_event = None
        previous_event = None
        for index, line in enumerate(line_list):
            current_event = line
            # if it's the first event, there is no previous event.
            if index == 0:
                previous_event = current_event
            else:
                previous_event = line_list[index - 1]
            # Refer to the last event to calculate the new backfill time
            time_difference = (
                current_event["base_time"] -
                previous_event["base_time"]) * self._sample.timeMultiple
            current_event["timediff"] = time_difference
        return line_list
Beispiel #8
0
def parse_args():
    """Parse command line arguments"""
    subparser_dict = {}
    parser = argparse.ArgumentParser(
        prog="Eventgen", description="Splunk Event Generation Tool")
    parser.add_argument("-v",
                        "--verbosity",
                        action="count",
                        help="increase output verbosity")
    parser.add_argument(
        "--version",
        action="version",
        default=False,
        version="%(prog)s " + EVENTGEN_VERSION,
    )
    parser.add_argument("--modinput-mode", default=False)
    parser.add_argument("--counter-output", action="store_true", default=False)
    subparsers = parser.add_subparsers(title="commands",
                                       help="valid subcommands",
                                       dest="subcommand")
    # Generate subparser
    generate_subparser = subparsers.add_parser(
        "generate", help="Generate events using a supplied config file")
    generate_subparser.add_argument(
        "configfile",
        help=
        "Location of eventgen.conf, app folder, or name of an app in $SPLUNK_HOME/etc/apps to run",
    )
    generate_subparser.add_argument(
        "-s",
        "--sample",
        help="Run specified sample only, disabling all other samples")
    generate_subparser.add_argument(
        "--keepoutput",
        action="store_true",
        help="Keep original outputMode for the sample",
    )
    generate_subparser.add_argument("--devnull",
                                    action="store_true",
                                    help="Set outputMode to devnull")
    generate_subparser.add_argument(
        "--modinput",
        action="store_true",
        help="Set outputMode to modinput, to see metadata",
    )
    generate_subparser.add_argument("-c",
                                    "--count",
                                    type=int,
                                    help="Set sample count")
    generate_subparser.add_argument("-i",
                                    "--interval",
                                    type=int,
                                    help="Set sample interval")
    generate_subparser.add_argument("-b",
                                    "--backfill",
                                    help="Set time to backfill from")
    generate_subparser.add_argument(
        "-e",
        "--end",
        help="Set time to end generation at or a number of intervals to run",
    )
    generate_subparser.add_argument(
        "--generators", type=int, help="Number of GeneratorWorkers (mappers)")
    generate_subparser.add_argument("--outputters",
                                    type=int,
                                    help="Number of OutputWorkers (reducers)")
    generate_subparser.add_argument("--disableOutputQueue",
                                    action="store_true",
                                    help="Disable reducer step")
    generate_subparser.add_argument(
        "--multiprocess",
        action="store_true",
        help="Use multiprocesing instead of threading",
    )
    generate_subparser.add_argument("--profiler",
                                    action="store_true",
                                    help="Turn on cProfiler")
    generate_subparser.add_argument(
        "--generator-queue-size",
        type=int,
        default=500,
        help="the max queue size for the "
        "generator queue, timer object puts all the generator tasks into this queue, default max size is 500",
    )
    generate_subparser.add_argument("--disable-logging",
                                    action="store_true",
                                    help="disable logging")
    # Build subparser
    build_subparser = subparsers.add_parser(
        "build", help="Will build different forms of sa-eventgen")
    build_subparser.add_argument(
        "--mode",
        type=str,
        default="splunk-app",
        help=
        "Specify what type of package to build, defaults to splunk-app mode.",
    )
    build_subparser.add_argument(
        "--destination",
        help="Specify where to store the output of the build command.")
    build_subparser.add_argument(
        "--remove",
        default=True,
        help="Remove the build directory after completion. Defaults to True",
    )
    # Service subparser
    service_subparser = subparsers.add_parser(
        "service",
        help=
        ("Run Eventgen as an api server. Parameters for starting this service can be defined as either env"
         "variables or CLI arguments, where env variables takes precedence. See help for more info."
         ),
    )
    service_subparser.add_argument(
        "--role",
        "-r",
        type=str,
        default=None,
        required=True,
        choices=["controller", "server", "standalone"],
        help=
        "Define the role for this Eventgen node. Options: controller, server, standalone",
    )
    service_subparser.add_argument("--redis-host",
                                   type=str,
                                   default="127.0.0.1",
                                   help="Redis Host")
    service_subparser.add_argument("--redis-port",
                                   type=str,
                                   default="6379",
                                   help="Redis Port")
    service_subparser.add_argument(
        "--web-server-port",
        type=str,
        default="9500",
        help="Port you want to run a web server on",
    )
    service_subparser.add_argument(
        "--multithread",
        action="store_true",
        help="Use multi-thread instead of multi-process",
    )
    # Help subparser
    # NOTE: Keep this at the end so we can use the subparser_dict.keys() to display valid commands
    help_subparser = subparsers.add_parser(
        "help", help="Display usage on a subcommand")
    helpstr = "Help on a specific command, valid commands are: " + ", ".join(
        list(subparser_dict.keys()) + ["help"])
    help_subparser.add_argument("command",
                                nargs="?",
                                default="default",
                                help=helpstr)
    # add subparsers to the subparser dict, this will be used later for usage / help statements.
    subparser_dict["generate"] = generate_subparser
    subparser_dict["build"] = build_subparser
    subparser_dict["help"] = help_subparser

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(2)

    args = parser.parse_args()

    if args.version:
        args.print_version()
        sys.exit(0)

    if "subcommand" not in args:
        parser.print_help()
        sys.exit(2)

    if args.subcommand == "service":
        if not args.role:
            msg = "Role is undefined. Please specify a role for this Eventgen service using --role/-r."
            logger.exception(msg)
            raise Exception(msg)

    if args.subcommand == "help" and args.command == "default":
        parser.print_help()
        sys.exit(0)

    if args.subcommand == "help":
        if args.command in list(subparser_dict.keys()):
            subparser_dict[args.command].print_help()
        else:
            parser.print_help()
        sys.exit(0)
    elif args.subcommand == "build" and not args.destination:
        print(
            "No destination passed for storing output file, attempting to use the current working dir."
        )

    # Allow passing of a Splunk app on the command line and expand the full path before passing up the chain
    if hasattr(args, "configfile") and not os.path.exists(args.configfile):
        if "SPLUNK_HOME" in os.environ:
            if os.path.isdir(
                    os.path.join(os.environ["SPLUNK_HOME"], "etc", "apps",
                                 args.configfile)):
                args.configfile = os.path.join(os.environ["SPLUNK_HOME"],
                                               "etc", "apps", args.configfile)
        else:
            args.configfile = None
    return args
Beispiel #9
0
    def real_run(self):
        """
        Worker function of the Timer class.  Determine whether a plugin is queueable, and either
        place an item in the generator queue for that plugin or call the plugin's gen method directly.
        """
        if self.sample.delay > 0:
            logger.info("Sample set to delay %s, sleeping." %
                        self.sample.delay)
            time.sleep(self.sample.delay)

        logger.debug("Timer creating plugin for '%s'" % self.sample.name)
        local_time = datetime.datetime.now()
        end = False
        raw_event_size = self.predict_event_size()
        if self.end:
            if int(self.end) == 0:
                logger.info(
                    "End = 0, no events will be generated for sample '%s'" %
                    self.sample.name)
                end = True
            elif int(self.end) == -1:
                logger.info(
                    "End is set to -1. Will be running without stopping for sample %s"
                    % self.sample.name)
        while not end:
            try:
                # Need to be able to stop threads by the main thread or this thread. self.config will stop all threads
                # referenced in the config object, while, self.stopping will only stop this one.
                if self.config.stopping or self.stopping:
                    end = True
                self.rater.update_options(
                    config=self.config,
                    sample=self.sample,
                    generatorQueue=self.generatorQueue,
                    outputQueue=self.outputQueue,
                    outputPlugin=self.outputPlugin,
                    generatorPlugin=self.generatorPlugin,
                )
                count = self.rater.rate()
                # First run of the generator, see if we have any backfill work to do.
                if self.countdown <= 0:
                    if self.sample.backfill and not self.sample.backfilldone:
                        self.backrater.update_options(
                            config=self.config,
                            sample=self.sample,
                            generatorQueue=self.generatorQueue,
                            outputQueue=self.outputQueue,
                            outputPlugin=self.outputPlugin,
                            generatorPlugin=self.generatorPlugin,
                            samplerater=self.rater,
                        )
                        self.backrater.queue_it(count)
                    else:
                        if self.sample.generator == "perdayvolumegenerator":
                            self.perdayrater.update_options(
                                config=self.config,
                                sample=self.sample,
                                generatorQueue=self.generatorQueue,
                                outputQueue=self.outputQueue,
                                outputPlugin=self.outputPlugin,
                                generatorPlugin=self.generatorPlugin,
                                samplerater=self.rater,
                                raweventsize=raw_event_size,
                            )
                            self.perdayrater.rate()
                            self.perdayrater.queue_it(count)
                        self.rater.queue_it(count)
                    self.countdown = self.interval
                    self.executions += 1

            except Exception as e:
                logger.exception(str(e))
                if self.stopping:
                    end = True
                pass

            # Sleep until we're supposed to wake up and generate more events
            if self.countdown == 0:
                self.countdown = self.interval

            # 8/20/15 CS Adding support for ending generation at a certain time

            if self.end:
                if int(self.end) == -1:
                    time.sleep(self.time)
                    self.countdown -= self.time
                    continue
                # 3/16/16 CS Adding support for ending on a number of executions instead of time
                # Should be fine with storing state in this sample object since each sample has it's own unique
                # timer thread
                if not self.endts:
                    if self.executions >= int(self.end):
                        logger.info(
                            "End executions %d reached, ending generation of sample '%s'"
                            % (int(self.end), self.sample.name))
                        self.stopping = True
                        end = True
                elif local_time >= self.endts:
                    logger.info(
                        "End Time '%s' reached, ending generation of sample '%s'"
                        % (self.sample.endts, self.sample.name))
                    self.stopping = True
                    end = True

            time.sleep(self.time)
            self.countdown -= self.time
Beispiel #10
0
    def gen(self, count, earliest, latest, samplename=None):
        # TODO: Figure out how to gracefully tell generator plugins to exit when there is an error.
        try:
            from jinja2 import Environment, FileSystemLoader
            self.target_count = count
            # assume that if there is no "count" field, we want to run 1 time, and only one time.
            if self.target_count == -1:
                self.target_count = 1
            self.earliest = earliest
            self.latest = latest
            if hasattr(self._sample, "jinja_count_type"):
                if self._sample.jinja_count_type in [
                        "line_count", "cycles", "perDayVolume"
                ]:
                    self.jinja_count_type = self._sample.jinja_count_type
            startTime = datetime.datetime.now()

            # if eventgen is running as Splunk app the configfile is None
            sample_dir = self._sample.sampleDir
            if self._sample.splunkEmbedded is True:
                splunk_home = os.environ["SPLUNK_HOME"]
                app_name = getattr(self._sample, 'app', 'SA-Eventgen')
                sample_dir = os.path.join(splunk_home, 'etc', 'apps', app_name,
                                          'samples')

            if not hasattr(self._sample, "jinja_template_dir"):
                template_dir = 'templates'
            else:
                template_dir = self._sample.jinja_template_dir

            if not os.path.isabs(template_dir):
                target_template_dir = os.path.join(sample_dir, template_dir)
            else:
                target_template_dir = template_dir
            logger.info('set jinja template path to %s', target_template_dir)

            if not hasattr(self._sample, "jinja_target_template"):
                raise CantFindTemplate(
                    "Template to load not specified in eventgen conf for stanza.  Skipping Stanza"
                )
            jinja_env = Environment(loader=FileSystemLoader(
                [target_template_dir], encoding='utf-8', followlinks=False),
                                    extensions=[
                                        'jinja2.ext.do', 'jinja2.ext.with_',
                                        'jinja2.ext.loopcontrols', JinjaTime
                                    ],
                                    line_statement_prefix="#",
                                    line_comment_prefix="##")

            jinja_loaded_template = jinja_env.get_template(
                str(self._sample.jinja_target_template))
            if hasattr(self._sample, 'jinja_variables'):
                jinja_loaded_vars = json.loads(self._sample.jinja_variables)
            else:
                jinja_loaded_vars = None
            # make the default generator vars accessable to jinja
            jinja_loaded_vars["eventgen_count"] = self.current_count
            jinja_loaded_vars["eventgen_maxcount"] = self.target_count
            jinja_loaded_vars["eventgen_earliest"] = self.earliest
            self.earliest_epoch = (
                self.earliest - datetime.datetime(1970, 1, 1)).total_seconds()
            jinja_loaded_vars["eventgen_earliest_epoch"] = self.earliest_epoch
            jinja_loaded_vars["eventgen_latest"] = self.latest
            jinja_loaded_vars["eventgen_latest_epoch"] = (
                self.latest - datetime.datetime(1970, 1, 1)).total_seconds()
            self.latest_epoch = (
                self.latest - datetime.datetime(1970, 1, 1)).total_seconds()
            while self.current_count < self.target_count:
                self.end_of_cycle = False
                jinja_loaded_vars["eventgen_count"] = self.current_count
                jinja_loaded_vars["eventgen_target_time_earliest"], jinja_loaded_vars["eventgen_target_time_latest"], \
                    jinja_loaded_vars["eventgen_target_time_slice_size"], \
                    jinja_loaded_vars["eventgen_target_time_epoch"] = \
                    JinjaTime._get_time_slice(self.earliest_epoch, self.latest_epoch, self.target_count,
                                              self.current_count, slice_type="random")
                self.jinja_stream = jinja_loaded_template.stream(
                    jinja_loaded_vars)
                lines_out = []
                try:
                    for raw_line in self.jinja_stream:
                        # trim the newline char for jinja output
                        # it is quite normal to output empty newlines in jinja
                        line = raw_line.strip()
                        if line:
                            # TODO: Time can be supported by self._sample.timestamp, should probably set that up here.
                            try:
                                target_line = json.loads(line)
                            except ValueError as e:
                                logger.error(
                                    "Unable to parse Jinja's return.  Line: {0}"
                                    .format(line))
                                logger.error(
                                    "Parse Failure Reason: {0}".format(
                                        e.message))
                                logger.error(
                                    "Please note, you must meet the requirements for json.loads in python if you have"
                                    +
                                    "not installed ujson. Native python does not support multi-line events."
                                )
                                continue
                            current_line_keys = list(target_line.keys())
                            if "_time" not in current_line_keys:
                                # TODO: Add a custom exception here
                                raise Exception(
                                    "No _time field supplied, please add time to your jinja template."
                                )
                            if "_raw" not in current_line_keys:
                                # TODO: Add a custom exception here
                                raise Exception(
                                    "No _raw field supplied, please add time to your jinja template."
                                )
                            if "host" not in current_line_keys:
                                target_line["host"] = self._sample.host
                            if "hostRegex" not in current_line_keys:
                                target_line[
                                    "hostRegex"] = self._sample.hostRegex
                            if "source" not in current_line_keys:
                                target_line["source"] = self._sample.source
                            if "sourcetype" not in current_line_keys:
                                target_line[
                                    "sourcetype"] = self._sample.sourcetype
                            if "index" not in current_line_keys:
                                target_line["index"] = self._sample.index
                            lines_out.append(target_line)
                except TypeError as e:
                    logger.exception(str(e))
                self.end_of_cycle = True
                self._increment_count(lines_out)
                self._out.bulksend(lines_out)
            endTime = datetime.datetime.now()
            timeDiff = endTime - startTime
            timeDiffFrac = "%d.%06d" % (timeDiff.seconds,
                                        timeDiff.microseconds)
            logger.debug("Interval complete, flushing feed")
            self._out.flush(endOfInterval=True)
            logger.info("Generation of sample '%s' completed in %s seconds." %
                        (self._sample.name, timeDiffFrac))
            return 0
        except Exception as e:
            logger.exception(str(e))
            return 1
Beispiel #11
0
    def real_run(self):
        """
        Worker function of the Timer class.  Determine whether a plugin is queueable, and either
        place an item in the generator queue for that plugin or call the plugin's gen method directly.
        """
        if self.sample.delay > 0:
            logger.info("Sample set to delay %s, sleeping." %
                        self.sample.delay)
            time.sleep(self.sample.delay)

        logger.debug("Timer creating plugin for '%s'" % self.sample.name)

        end = False
        previous_count_left = 0
        raw_event_size = self.predict_event_size()
        if self.end:
            if int(self.end) == 0:
                logger.info(
                    "End = 0, no events will be generated for sample '%s'" %
                    self.sample.name)
                end = True
            elif int(self.end) == -1:
                logger.info(
                    "End is set to -1. Will be running without stopping for sample %s"
                    % self.sample.name)
        while not end:
            # Need to be able to stop threads by the main thread or this thread. self.config will stop all threads
            # referenced in the config object, while, self.stopping will only stop this one.
            if self.config.stopping or self.stopping:
                end = True
                continue
            count = self.rater.rate()
            # First run of the generator, see if we have any backfill work to do.
            if self.countdown <= 0:
                if self.sample.backfill and not self.sample.backfilldone:
                    realtime = self.sample.now(realnow=True)
                    if "-" in self.sample.backfill[0]:
                        mathsymbol = "-"
                    else:
                        mathsymbol = "+"
                    backfillnumber = ""
                    backfillletter = ""
                    for char in self.sample.backfill:
                        if char.isdigit():
                            backfillnumber += char
                        elif char != "-":
                            backfillletter += char
                    backfillearliest = timeParserTimeMath(plusminus=mathsymbol,
                                                          num=backfillnumber,
                                                          unit=backfillletter,
                                                          ret=realtime)
                    while backfillearliest < realtime:
                        if self.end and self.executions == int(self.end):
                            logger.info(
                                "End executions %d reached, ending generation of sample '%s'"
                                % (int(self.end), self.sample.name))
                            break
                        et = backfillearliest
                        lt = timeParserTimeMath(plusminus="+",
                                                num=self.interval,
                                                unit="s",
                                                ret=et)
                        copy_sample = copy.copy(self.sample)
                        tokens = copy.deepcopy(self.sample.tokens)
                        copy_sample.tokens = tokens
                        genPlugin = self.generatorPlugin(sample=copy_sample)
                        # need to make sure we set the queue right if we're using multiprocessing or thread modes
                        genPlugin.updateConfig(config=self.config,
                                               outqueue=self.outputQueue)
                        genPlugin.updateCounts(count=count,
                                               start_time=et,
                                               end_time=lt)
                        try:
                            self.generatorQueue.put(genPlugin, True, 3)
                            self.executions += 1
                            backfillearliest = lt
                        except Full:
                            logger.warning(
                                "Generator Queue Full. Reput the backfill generator task later. %d backfill generators are dispatched.",
                                self.executions)
                            backfillearliest = et
                        realtime = self.sample.now(realnow=True)

                    self.sample.backfilldone = True
                else:
                    # 12/15/13 CS Moving the rating to a separate plugin architecture
                    # Save previous interval count left to avoid perdayvolumegenerator drop small tasks
                    if self.sample.generator == 'perdayvolumegenerator':
                        count = self.rater.rate() + previous_count_left
                        if 0 < count < raw_event_size:
                            logger.info(
                                "current interval size is {}, which is smaller than a raw event size {}."
                                .format(count, raw_event_size) +
                                "Wait for the next turn.")
                            previous_count_left = count
                            self.countdown = self.interval
                            self.executions += 1
                            continue
                        else:
                            previous_count_left = 0
                    else:
                        count = self.rater.rate()

                    et = self.sample.earliestTime()
                    lt = self.sample.latestTime()

                    try:
                        if count < 1 and count != -1:
                            logger.info(
                                "There is no data to be generated in worker {0} because the count is {1}."
                                .format(self.sample.config.generatorWorkers,
                                        count))
                        else:
                            # Spawn workers at the beginning of job rather than wait for next interval
                            logger.info(
                                "Starting '%d' generatorWorkers for sample '%s'"
                                % (self.sample.config.generatorWorkers,
                                   self.sample.name))
                            for worker_id in range(
                                    self.config.generatorWorkers):
                                copy_sample = copy.copy(self.sample)
                                tokens = copy.deepcopy(self.sample.tokens)
                                copy_sample.tokens = tokens
                                genPlugin = self.generatorPlugin(
                                    sample=copy_sample)
                                # Adjust queue for threading mode
                                genPlugin.updateConfig(
                                    config=self.config,
                                    outqueue=self.outputQueue)
                                genPlugin.updateCounts(count=count,
                                                       start_time=et,
                                                       end_time=lt)

                                try:
                                    self.generatorQueue.put(genPlugin)
                                    logger.debug((
                                        "Worker# {0}: Put {1} MB of events in queue for sample '{2}'"
                                        + "with et '{3}' and lt '{4}'").format(
                                            worker_id,
                                            round((count / 1024.0 / 1024), 4),
                                            self.sample.name, et, lt))
                                except Full:
                                    logger.warning(
                                        "Generator Queue Full. Skipping current generation."
                                    )
                            self.executions += 1
                    except Exception as e:
                        logger.exception(str(e))
                        if self.stopping:
                            end = True
                        pass

                # Sleep until we're supposed to wake up and generate more events
                self.countdown = self.interval

                # 8/20/15 CS Adding support for ending generation at a certain time

                if self.end:
                    if int(self.end) == -1:
                        time.sleep(self.time)
                        self.countdown -= self.time
                        continue
                    # 3/16/16 CS Adding support for ending on a number of executions instead of time
                    # Should be fine with storing state in this sample object since each sample has it's own unique
                    # timer thread
                    if not self.endts:
                        if self.executions >= int(self.end):
                            logger.info(
                                "End executions %d reached, ending generation of sample '%s'"
                                % (int(self.end), self.sample.name))
                            self.stopping = True
                            end = True
                    elif lt >= self.endts:
                        logger.info(
                            "End Time '%s' reached, ending generation of sample '%s'"
                            % (self.sample.endts, self.sample.name))
                        self.stopping = True
                        end = True

            else:
                time.sleep(self.time)
                self.countdown -= self.time