Beispiel #1
0
 def _transmitEvents(self, payloadstring):
     targetServer = []
     logger.debug("Transmission called with payloadstring: %s " %
                  payloadstring)
     if self.httpeventoutputmode == "mirror":
         targetServer = self.serverPool
     else:
         targetServer.append(random.choice(self.serverPool))
     for server in targetServer:
         logger.debug("Selected targetServer object: %s" % targetServer)
         url = server["url"]
         headers = {}
         headers["Authorization"] = server["header"]
         headers["content-type"] = "application/json"
         try:
             payloadsize = len(payloadstring)
             self.active_sessions.append(
                 self.session.post(url=url,
                                   data=payloadstring,
                                   headers=headers,
                                   verify=False))
         except Exception as e:
             logger.error("Failed for exception: %s" % e)
             logger.error(
                 "Failed sending events to url: %s  sourcetype: %s  size: %s"
                 % (url, self.lastsourcetype, payloadsize))
             logger.debug(
                 "Failed sending events to url: %s  headers: %s payload: %s"
                 % (url, headers, payloadstring))
             raise e
Beispiel #2
0
 def __init__(self, time, sample=None, config=None, genqueue=None, outputqueue=None, loggingqueue=None):
     # Logger already setup by config, just get an instance
     # setup default options
     self.profiler = config.profiler
     self.config = config
     self.sample = sample
     self.end = getattr(self.sample, "end", -1)
     self.endts = getattr(self.sample, "endts", None)
     self.generatorQueue = genqueue
     self.outputQueue = outputqueue
     self.time = time
     self.stopping = False
     self.countdown = 0
     self.executions = 0
     self.interval = getattr(self.sample, "interval", config.interval)
     logger.debug('Initializing timer for %s' % sample.name if sample is not None else "None")
     # load plugins
     if self.sample is not None:
         rater_class = self.config.getPlugin('rater.' + self.sample.rater, self.sample)
         self.rater = rater_class(self.sample)
         self.generatorPlugin = self.config.getPlugin('generator.' + self.sample.generator, self.sample)
         self.outputPlugin = self.config.getPlugin('output.' + self.sample.outputMode, self.sample)
         if self.sample.timeMultiple < 0:
             logger.error("Invalid setting for timeMultiple: {}, value should be positive".format(
                 self.sample.timeMultiple))
         elif self.sample.timeMultiple != 1:
             self.interval = self.sample.interval
             logger.debug("Adjusting interval {} with timeMultiple {}, new interval: {}".format(
                 self.sample.interval, self.sample.timeMultiple, self.interval))
     logger.info(
         "Start '%s' generatorWorkers for sample '%s'" % (self.sample.config.generatorWorkers, self.sample.name))
Beispiel #3
0
    def __init__(self, sample, output_counter=None):

        # Override maxQueueLength to EventPerKey so that each flush
        # will generate one aws key
        if sample.awsS3EventPerKey:
            sample.maxQueueLength = sample.awsS3EventPerKey

        OutputPlugin.__init__(self, sample, output_counter)

        if not boto_imported:
            logger.error("There is no boto3 or botocore library available")
            return

        # disable any "requests" warnings
        requests.packages.urllib3.disable_warnings()

        # Bind passed in samples to the outputter.
        self.awsS3compressiontype = (sample.awsS3CompressionType
                                     if hasattr(sample, "awsS3CompressionType")
                                     and sample.awsS3CompressionType else None)
        self.awsS3eventtype = (sample.awsS3EventType
                               if hasattr(sample, "awsS3EventType")
                               and sample.awsS3EventType else "syslog")
        self.awsS3objectprefix = (sample.awsS3ObjectPrefix
                                  if hasattr(sample, "awsS3ObjectPrefix")
                                  and sample.awsS3ObjectPrefix else "")
        self.awsS3objectsuffix = (sample.awsS3ObjectSuffix
                                  if hasattr(sample, "awsS3ObjectSuffix")
                                  and sample.awsS3ObjectSuffix else "")
        self.awsS3bucketname = sample.awsS3BucketName
        logger.debug("Setting up the connection pool for %s in %s" %
                     (self._sample.name, self._app))
        self._client = None
        self._createConnections(sample)
        logger.debug("Finished init of awsS3 plugin.")
Beispiel #4
0
    def __init__(self, sample, output_counter=None):
        OutputPlugin.__init__(self, sample, output_counter)

        if sample.fileName is None:
            logger.error(
                "outputMode file but file not specified for sample %s" %
                self._sample.name)
            raise ValueError(
                "outputMode file but file not specified for sample %s" %
                self._sample.name)

        self._file = sample.pathParser(sample.fileName)
        self._fileMaxBytes = sample.fileMaxBytes
        self._fileBackupFiles = sample.fileBackupFiles
        self._fileFiles = sample.fileFiles

        def new_name(name, i):
            split = name.rsplit(".", 1)
            if len(split) == 1:
                return "{}_{}".format(name, i)
            else:
                return "{}_{}.{}".format(split[0], i, split[1])

        self._multifiles = [
            new_name(self._file, i) for i in range(int(self._fileFiles))
        ]
        self._fileHandles = [open(file, "a") for file in self._multifiles]
        self._fileLengths = [
            os.stat(file).st_size for file in self._multifiles
        ]
Beispiel #5
0
 def renewAccessToken(self):
     response = requests.post(self.tokenRenewEndPoint,
                              data=self.tokenRenewBody,
                              timeout=5)
     if response.status_code == 200:
         logger.info("Renewal of the access token succesful")
         self.scsAccessToken = response.json()["access_token"]
         setattr(self._sample, "scsAccessToken", self.scsAccessToken)
         self.accessTokenExpired = False
     else:
         logger.error("Renewal of the access token failed")
Beispiel #6
0
    def queue_it(self, count):
        try:
            realtime = self.sample.now(realnow=True)
            if "-" in self.sample.backfill[0]:
                mathsymbol = "-"
            else:
                mathsymbol = "+"
            backfillnumber = ""
            backfillletter = ""
            for char in self.sample.backfill:
                if char.isdigit():
                    backfillnumber += char
                elif char != "-":
                    backfillletter += char
            backfillearliest = timeParserTimeMath(
                plusminus=mathsymbol,
                num=backfillnumber,
                unit=backfillletter,
                ret=realtime,
            )
            while backfillearliest < realtime:
                et = backfillearliest
                lt = timeParserTimeMath(plusminus="+",
                                        num=self.sample.interval,
                                        unit="s",
                                        ret=et)
                genPlugin = self.generatorPlugin(sample=self.sample)
                genPlugin.updateCounts(count=count, start_time=et, end_time=lt)
                genPlugin.updateConfig(config=self.config,
                                       outqueue=self.outputQueue)
                try:
                    # Need to lock on replay mode since event duration is dynamic.  Interval starts counting
                    # after the replay has finished.
                    if self.sample.generator == "replay":
                        genPlugin.run()
                    else:
                        self.generatorQueue.put(genPlugin)
                except Full:
                    logger.warning(
                        "Generator Queue Full. Skipping current generation.")
                # due to replays needing to iterate in reverse, it's more efficent to process backfill
                # after the file has been parsed.  This section is to allow replay mode to take
                # care of all replays on it's first run. and sets backfilldone
                if self.sample.generator == "replay":
                    backfillearliest = realtime
                else:
                    backfillearliest = lt
            if self.sample.generator != "replay":
                self.sample.backfilldone = True

        except Exception as e:
            logger.error("Failed queuing backfill, exception: {0}".format(e))
Beispiel #7
0
    def processSampleLine(self, filehandler):
        """
        Due to a change in python3, utf-8 is now the default trying to read a file.  To get around this we need the
        process loop outside of the filehandler.
        :param filehandler:
        :return:
        """
        sampleLines = []
        if self.breaker == self.config.breaker:
            logger.debug("Reading raw sample '%s' in app '%s'" %
                         (self.name, self.app))
            sampleLines = filehandler.readlines()
        # 1/5/14 CS Moving to using only sampleDict and doing the breaking up into events at load time
        # instead of on every generation
        else:
            logger.debug(
                "Non-default breaker '%s' detected for sample '%s' in app '%s'"
                % (self.breaker, self.name, self.app))
            sampleData = filehandler.read()
            logger.debug(
                "Filling array for sample '%s' in app '%s'; sampleData=%s, breaker=%s"
                % (self.name, self.app, len(sampleData), self.breaker))
            try:
                breakerRE = re.compile(self.breaker, re.M)
            except:
                logger.error(
                    "Line breaker '%s' for sample '%s' in app '%s'"
                    " could not be compiled; using default breaker",
                    self.breaker,
                    self.name,
                    self.app,
                )
                self.breaker = self.config.breaker

            # Loop through data, finding matches of the regular expression and breaking them up into
            # "lines".  Each match includes the breaker itself.
            extractpos = 0
            searchpos = 0
            breakerMatch = breakerRE.search(sampleData, searchpos)
            while breakerMatch:
                logger.debug("Breaker found at: %d, %d" %
                             (breakerMatch.span()[0], breakerMatch.span()[1]))
                # Ignore matches at the beginning of the file
                if breakerMatch.span()[0] != 0:
                    sampleLines.append(
                        sampleData[extractpos:breakerMatch.span()[0]])
                    extractpos = breakerMatch.span()[0]
                searchpos = breakerMatch.span()[1]
                breakerMatch = breakerRE.search(sampleData, searchpos)
            sampleLines.append(sampleData[extractpos:])
        return sampleLines
Beispiel #8
0
 def gen(self, count, earliest, latest, samplename=None):
     # 9/8/15 CS Check to make sure we have events to replay
     self._sample.loadSample()
     self.current_time = self._sample.now()
     line_list = self.load_sample_file()
     # If backfill exists, calculate the start of the backfill time relative to the current time.
     # Otherwise, backfill time equals to the current time
     self.backfill_time = self._sample.get_backfill_time(self.current_time)
     # if we have backfill, replay the events backwards until we hit the backfill
     if self.backfill_time != self.current_time and not self._sample.backfilldone:
         backfill_count_time = self.current_time
         current_backfill_index = len(line_list) - 1
         backfill_events = []
         while backfill_count_time >= self.backfill_time:
             rpevent = line_list[current_backfill_index]
             backfill_count_time = backfill_count_time - rpevent["timediff"]
             backfill_events.append(
                 self.set_time_and_tokens(rpevent, backfill_count_time,
                                          earliest, latest))
             current_backfill_index -= 1
             if current_backfill_index < 0:
                 current_backfill_index = len(line_list) - 1
         backfill_events.reverse()
         self._out.bulksend(backfill_events)
         self._sample.backfilldone = True
     previous_event = None
     for index, rpevent in enumerate(line_list):
         if previous_event is None:
             current_event = self.set_time_and_tokens(
                 rpevent, self.backfill_time, earliest, latest)
             previous_event = current_event
             previous_event_timediff = rpevent["timediff"]
             self._out.bulksend([current_event])
             continue
         try:
             time.sleep(previous_event_timediff.total_seconds())
         except ValueError:
             logger.error(
                 "Can't sleep for negative time, please make sure your events are in time order."
                 "see line Number{0}".format(index))
             logger.error("Event: {0}".format(rpevent))
             pass
         current_time = datetime.datetime.now()
         previous_event = rpevent
         previous_event_timediff = rpevent["timediff"]
         send_event = self.set_time_and_tokens(rpevent, current_time,
                                               earliest, latest)
         self._out.bulksend([send_event])
     self._out.flush(endOfInterval=True)
     return
Beispiel #9
0
 def bulksend(self, msglist):
     """
     Accepts list, msglist, and adds to the output buffer.  If the buffer exceeds MAXQUEUELENGTH, then flush.
     """
     try:
         self._queue.extend(msglist)
         if len(self._queue) >= self.MAXQUEUELENGTH:
             self.flush()
     except Exception as e:
         # We don't want to exit if there's a single bad event
         logger.error(
             "Caught Exception {} while appending/flushing output queue. There may be a "
             .format(e) +
             "faulty event or token replacement in your sample.")
Beispiel #10
0
 def checkResults(self):
     for session in self.active_sessions:
         response = session.result()
         if response.status_code == 401 and "Invalid or Expired Bearer Token" in response.text:
             logger.error("scsAccessToken is invalid or expired")
             self.accessTokenExpired = True
             return False
         elif response.status_code != 200:
             logger.error(
                 f"Data transmisison failed with {response.status_code} and {response.text}"
             )
             return False
     logger.debug(f"Data transmission successful")
     return True
Beispiel #11
0
    def __init__(self, sample, output_counter=None):
        OutputPlugin.__init__(self, sample, output_counter)

        from splunk_eventgen.lib.eventgenconfig import Config
        globals()['c'] = Config()

        self._splunkUrl, self._splunkMethod, self._splunkHost, self._splunkPort = c.getSplunkUrl(
            self._sample)  # noqa
        self._splunkUser = self._sample.splunkUser
        self._splunkPass = self._sample.splunkPass

        # Cancel SSL verification
        import ssl
        ssl._create_default_https_context = ssl._create_unverified_context

        if not self._sample.sessionKey:
            try:
                myhttp = httplib2.Http(disable_ssl_certificate_validation=True)
                logger.debug(
                    "Getting session key from '%s' with user '%s' and pass '%s'"
                    % (self._splunkUrl + '/services/auth/login',
                       self._splunkUser, self._splunkPass))
                response = myhttp.request(self._splunkUrl +
                                          '/services/auth/login',
                                          'POST',
                                          headers={},
                                          body=urllib.parse.urlencode({
                                              'username':
                                              self._splunkUser,
                                              'password':
                                              self._splunkPass
                                          }))[1]
                self._sample.sessionKey = minidom.parseString(
                    response).getElementsByTagName(
                        'sessionKey')[0].childNodes[0].nodeValue
                logger.debug(
                    "Got new session for splunkstream, sessionKey '%s'" %
                    self._sample.sessionKey)
            except:
                logger.error(
                    "Error getting session key for non-SPLUNK_EMBEEDED for sample '%s'."
                    % self._sample.name + " Credentials are missing or wrong")
                raise IOError(
                    "Error getting session key for non-SPLUNK_EMBEEDED for sample '%s'."
                    % self._sample.name + "Credentials are missing or wrong")

        logger.debug(
            "Retrieved session key '%s' for Splunk session for sample %s'" %
            (self._sample.sessionKey, self._sample.name))
Beispiel #12
0
 def get_backfill_time(self, current_time):
     if not current_time:
         current_time = self.now()
     if not self.backfill:
         return current_time
     else:
         if self.backfill[0] == '-':
             backfill_time = self.backfill[1:-1]
             time_unit = self.backfill[-1]
             if self.backfill[-2:] == 'ms':
                 time_unit = 'ms'
                 backfill_time = self.backfill[1:-2]
             return self.get_time_difference(current_time=current_time, different_time=backfill_time, sign='-',
                                             time_unit=time_unit)
         else:
             logger.error("Backfill time is not in the past.")
     return current_time
Beispiel #13
0
 def flush(self, q):
     if len(q) > 0:
         logger.debug(
             "Flushing output for sample '%s' in app '%s' for queue '%s'" %
             (self._sample.name, self._app, self._sample.source))
         # Keep trying to open destination file as it might be touched by other processes
         data = ''.join(event['_raw'] for event in q if event.get('_raw'))
         while True:
             try:
                 with open(self.spoolPath, 'a') as dst:
                     dst.write(data)
                 break
             except Exception as e:
                 logger.error(str(e))
                 time.sleep(0.1)
         logger.debug("Queue for app '%s' sample '%s' written" %
                      (self._app, self._sample.name))
Beispiel #14
0
 def updateConfig(self, config):
     OutputPlugin.updateConfig(self, config)
     try:
         if hasattr(self.config, 'httpeventServers') is False:
             if hasattr(self._sample, 'httpeventServers'):
                 self.config.httpeventServers = self._sample.httpeventServers
             else:
                 logger.error(
                     'outputMode %s but httpeventServers not specified for sample %s'
                     % (self.name, self._sample.name))
                 raise NoServers(
                     'outputMode %s but httpeventServers not specified for sample %s'
                     % (self.name, self._sample.name))
         # set default output mode to round robin
         if hasattr(
                 self.config,
                 'httpeventOutputMode') and self.config.httpeventOutputMode:
             self.httpeventoutputmode = config.httpeventOutputMode
         else:
             if hasattr(self._sample, 'httpeventOutputMode'
                        ) and self._sample.httpeventOutputMode:
                 self.httpeventoutputmode = self._sample.httpeventOutputMode
             else:
                 self.httpeventoutputmode = 'roundrobin'
         if hasattr(self.config, 'httpeventMaxPayloadSize'
                    ) and self.config.httpeventMaxPayloadSize:
             self.httpeventmaxsize = self.config.httpeventMaxPayloadSize
         else:
             if hasattr(self._sample, 'httpeventMaxPayloadSize'
                        ) and self._sample.httpeventMaxPayloadSize:
                 self.httpeventmaxsize = self._sample.httpeventMaxPayloadSize
             else:
                 self.httpeventmaxsize = 10000
         logger.debug("Currentmax size: %s " % self.httpeventmaxsize)
         if isinstance(config.httpeventServers, str):
             self.httpeventServers = json.loads(config.httpeventServers)
         else:
             self.httpeventServers = config.httpeventServers
         logger.debug("Setting up the connection pool for %s in %s" %
                      (self._sample.name, self._app))
         self.createConnections()
         logger.debug("Pool created.")
         logger.debug("Finished init of %s plugin." % self.name)
     except Exception as e:
         logger.exception(str(e))
Beispiel #15
0
 def flush(self, q):
     logger.debug("Flush called on httpevent plugin")
     self._setup_REST_workers()
     if len(q) > 0:
         try:
             payload = []
             logger.debug("Currently being called with %d events" % len(q))
             for event in q:
                 logger.debug("HTTPEvent proccessing event: %s" % event)
                 payloadFragment = {}
                 if event.get('_raw') is None or event['_raw'] == "\n":
                     logger.error('failure outputting event, does not contain _raw')
                 else:
                     logger.debug("Event contains _raw, attempting to process...")
                     payloadFragment['event'] = event['_raw']
                     if event.get('source'):
                         logger.debug("Event contains source, adding to httpevent event")
                         payloadFragment['source'] = event['source']
                     if event.get('sourcetype'):
                         logger.debug("Event contains sourcetype, adding to httpevent event")
                         payloadFragment['sourcetype'] = event['sourcetype']
                         self.lastsourcetype = event['sourcetype']
                     if event.get('host'):
                         logger.debug("Event contains host, adding to httpevent event")
                         payloadFragment['host'] = event['host']
                     if event.get('_time'):
                         # make sure _time can be an epoch timestamp
                         try:
                             float(event.get("_time"))
                             logger.debug("Event contains _time, adding to httpevent event")
                             payloadFragment['time'] = event['_time']
                         except:
                             logger.error("Timestamp not in epoch format, ignoring event: {0}".format(event))
                     if event.get('index'):
                         logger.debug("Event contains index, adding to httpevent event")
                         payloadFragment['index'] = event['index']
                 logger.debug("Full payloadFragment: %s" % json.dumps(payloadFragment))
                 payload.append(payloadFragment)
             logger.debug("Finished processing events, sending all to splunk")
             self._sendHTTPEvents(payload)
             payload = []
             if self.config.httpeventWaitResponse:
                 for session in self.active_sessions:
                     response = session.result()
                     if not response.raise_for_status():
                         logger.debug("Payload successfully sent to httpevent server.")
                     else:
                         logger.error("Server returned an error while trying to send, response code: %s" %
                                           response.status_code)
                         raise BadConnection(
                             "Server returned an error while sending, response code: %s" % response.status_code)
             else:
                 logger.debug("Ignoring response from HTTP server, leaving httpevent outputter")
         except Exception as e:
             logger.error('failed indexing events, reason: %s ' % e)
Beispiel #16
0
 def rate(self):
     self.sample.count = int(self.sample.count)
     # Let generators handle infinite count for themselves
     if self.sample.count == -1 and self.sample.generator == "default":
         if not self.sample.sampleDict:
             logger.error(
                 "No sample found for default generator, cannot generate events"
             )
         self.sample.count = len(self.sample.sampleDict)
     count = self.sample.count
     rateFactor = self.adjust_rate_factor()
     ret = int(round(count * rateFactor, 0))
     if rateFactor != 1.0:
         logger.debug(
             "Original count: %s Rated count: %s Rate factor: %s"
             % (count, ret, rateFactor)
         )
     return ret
Beispiel #17
0
    def __init__(self, sample, output_counter=None):
        OutputPlugin.__init__(self, sample, output_counter)

        if sample.fileName is None:
            logger.error(
                "outputMode file but file not specified for sample %s" %
                self._sample.name)
            raise ValueError(
                "outputMode file but file not specified for sample %s" %
                self._sample.name)

        self._file = sample.pathParser(sample.fileName)
        self._fileMaxBytes = sample.fileMaxBytes
        self._fileBackupFiles = sample.fileBackupFiles

        self._fileHandle = open(self._file, "a")
        self._fileLength = os.stat(self._file).st_size
        logger.debug(
            "Configured to log to '%s' with maxBytes '%s' with backupCount '%s'"
            % (self._file, self._fileMaxBytes, self._fileBackupFiles))
Beispiel #18
0
    def createConnections(self):
        self.serverPool = []
        if self.httpeventServers:
            for server in self.httpeventServers.get("servers"):
                if not server.get("address"):
                    logger.error(
                        "requested a connection to a httpevent server, but no address specified for sample %s"
                        % self._sample.name)
                    raise ValueError(
                        "requested a connection to a httpevent server, but no address specified for sample %s"
                        % self._sample.name)
                if not server.get("port"):
                    logger.error(
                        "requested a connection to a httpevent server, but no port specified for server %s"
                        % server)
                    raise ValueError(
                        "requested a connection to a httpevent server, but no port specified for server %s"
                        % server)
                if not server.get("key"):
                    logger.error(
                        "requested a connection to a httpevent server, but no key specified for server %s"
                        % server)
                    raise ValueError(
                        "requested a connection to a httpevent server, but no key specified for server %s"
                        % server)
                if not ((server.get("protocol") == "http") or
                        (server.get("protocol") == "https")):
                    logger.error(
                        "requested a connection to a httpevent server, but no protocol specified for server %s"
                        % server)
                    raise ValueError(
                        "requested a connection to a httpevent server, but no protocol specified for server %s"
                        % server)
                logger.debug(
                    "Validation Passed, Creating a requests object for server: %s"
                    % server.get("address"))

                setserver = {}
                setserver["url"] = "%s://%s:%s/services/collector" % (
                    server.get("protocol"),
                    server.get("address"),
                    server.get("port"),
                )
                setserver["header"] = "Splunk %s" % server.get("key")
                logger.debug("Adding server set to pool, server: %s" %
                             setserver)
                self.serverPool.append(setserver)
        else:
            raise NoServers(
                "outputMode %s but httpeventServers not specified for sample %s"
                % (self.name, self._sample.name))
Beispiel #19
0
    def _transmitEvents(self, payloadstring):
        logger.debug(
            "Transmission called with payloadstring event number: %d " %
            len(payloadstring))
        records = "".join(payloadstring)
        # Different key prefix for different log type
        if self.awsS3eventtype == "elbaccesslog":
            s3keyname = (self.awsS3objectprefix +
                         datetime.datetime.utcnow().strftime("%Y%m%dT%H%MZ") +
                         "_" + str(uuid.uuid1()) + self.awsS3objectsuffix)
        elif self.awsS3eventtype == "s3accesslog":
            s3keyname = (
                self.awsS3objectprefix +
                datetime.datetime.utcnow().strftime("%Y-%m-%d-%H-%M-%S") +
                "-" + str(uuid.uuid1()).replace("-", "").upper()[0:15] +
                self.awsS3objectsuffix)
        else:
            s3keyname = (self.awsS3objectprefix +
                         datetime.datetime.utcnow().isoformat() +
                         str(uuid.uuid1()) + self.awsS3objectsuffix)
        logger.debug("Uploading %d events into s3 key: %s " %
                     (len(records), s3keyname))
        if self.awsS3compressiontype == "gz":
            import io
            import gzip

            out = io.StringIO()
            with gzip.GzipFile(fileobj=out, mode="w") as f:
                f.write(records)
            records = out.getvalue()
        try:
            response = self._client.put_object(Bucket=self.awsS3bucketname,
                                               Key=s3keyname,
                                               Body=records)
            logger.debug("response = %s" % response)
        except Exception as e:
            logger.error("Failed for exception: %s" % e)
            logger.debug("Failed sending events to payload: %s" %
                         (payloadstring))
            raise e
Beispiel #20
0
    def createConnections(self):
        self.serverPool = []
        if self.httpeventServers:
            for server in self.httpeventServers.get('servers'):
                if not server.get('address'):
                    logger.error(
                        'requested a connection to a httpevent server, but no address specified for sample %s'
                        % self._sample.name)
                    raise ValueError(
                        'requested a connection to a httpevent server, but no address specified for sample %s'
                        % self._sample.name)
                if not server.get('port'):
                    logger.error(
                        'requested a connection to a httpevent server, but no port specified for server %s'
                        % server)
                    raise ValueError(
                        'requested a connection to a httpevent server, but no port specified for server %s'
                        % server)
                if not server.get('key'):
                    logger.error(
                        'requested a connection to a httpevent server, but no key specified for server %s'
                        % server)
                    raise ValueError(
                        'requested a connection to a httpevent server, but no key specified for server %s'
                        % server)
                if not ((server.get('protocol') == 'http') or
                        (server.get('protocol') == 'https')):
                    logger.error(
                        'requested a connection to a httpevent server, but no protocol specified for server %s'
                        % server)
                    raise ValueError(
                        'requested a connection to a httpevent server, but no protocol specified for server %s'
                        % server)
                logger.debug(
                    "Validation Passed, Creating a requests object for server: %s"
                    % server.get('address'))

                setserver = {}
                setserver['url'] = "%s://%s:%s/services/collector" % (
                    server.get('protocol'), server.get('address'),
                    server.get('port'))
                setserver['header'] = "Splunk %s" % server.get('key')
                logger.debug("Adding server set to pool, server: %s" %
                             setserver)
                self.serverPool.append(setserver)
        else:
            raise NoServers(
                'outputMode %s but httpeventServers not specified for sample %s'
                % (self.name, self._sample.name))
Beispiel #21
0
 def flush(self, q):
     logger.debug("Flush called on awsS3 plugin with length %d" % len(q))
     if len(q) > 0:
         try:
             payload = []
             logger.debug("Currently being called with %d events" % len(q))
             for event in q:
                 if event.get('_raw') is None:
                     logger.error(
                         'failure outputting event, does not contain _raw')
                 else:
                     payload.append(event['_raw'])
             logger.debug(
                 "Finished processing events, sending all to AWS S3")
             self._sendPayloads(payload)
         except Exception as e:
             import traceback
             logger.error(traceback.print_exc())
             logger.error('failed sending events, reason: %s ' % e)
Beispiel #22
0
    def _createConnections(self, sample):
        try:
            if hasattr(sample, "awsKeyId") and hasattr(sample, "awsSecretKey"):
                self._client = boto3.client(
                    "s3",
                    region_name=sample.awsRegion,
                    aws_access_key_id=sample.awsKeyId,
                    aws_secret_access_key=sample.awsSecretKey,
                )
                if self._client is None:
                    msg = """
                    [your_eventgen_stanza]
                    awsKeyId = YOUR_ACCESS_KEY
                    awsSecretKey = YOUR_SECRET_KEY
                    """

                    logger.error(
                        "Failed for init boto3 client: %s, you should define correct 'awsKeyId'\
                        and 'awsSecretKey' in eventgen conf %s" % msg)
                    raise Exception(msg)
            else:
                self._client = boto3.client("s3", region_name=sample.awsRegion)
        except Exception as e:
            logger.error("Failed for init boto3 client: exception =  %s" % e)
            raise e
        # Try list bucket method to validate if the connection works
        try:
            self._client.list_buckets()
        except botocore.exceptions.NoCredentialsError:
            msg = """
            [default]
            aws_access_key_id = YOUR_ACCESS_KEY
            aws_secret_access_key = YOUR_SECRET_KEY
            """

            logger.error("Failed for init boto3 client, you should create "
                         "'~/.aws/credentials' with credential info %s" % msg)
            raise
        logger.debug("Init conn done, conn = %s" % self._client)
Beispiel #23
0
 def rate(self):
     self._sample.count = int(self._sample.count)
     # Let generators handle infinite count for themselves
     if self._sample.count == -1 and self._sample.generator == 'default':
         if not self._sample.sampleDict:
             logger.error(
                 'No sample found for default generator, cannot generate events'
             )
         self._sample.count = len(self._sample.sampleDict)
     self._generatorWorkers = int(self._generatorWorkers)
     count = self._sample.count / self._generatorWorkers
     # 5/8/12 CS We've requested not the whole file, so we should adjust count based on
     # hourOfDay, dayOfWeek and randomizeCount configs
     rateFactor = 1.0
     if self._sample.randomizeCount:
         try:
             logger.debug(
                 "randomizeCount for sample '%s' in app '%s' is %s" %
                 (self._sample.name, self._sample.app,
                  self._sample.randomizeCount))
             # If we say we're going to be 20% variable, then that means we
             # can be .1% high or .1% low.  Math below does that.
             randBound = round(self._sample.randomizeCount * 1000, 0)
             rand = random.randint(0, randBound)
             randFactor = 1 + ((-((randBound / 2) - rand)) / 1000)
             logger.debug("randFactor for sample '%s' in app '%s' is %s" %
                          (self._sample.name, self._sample.app, randFactor))
             rateFactor *= randFactor
         except:
             import traceback
             stack = traceback.format_exc()
             logger.error(
                 "Randomize count failed for sample '%s'.  Stacktrace %s" %
                 (self._sample.name, stack))
     if type(self._sample.hourOfDayRate) == dict:
         try:
             rate = self._sample.hourOfDayRate[str(self._sample.now().hour)]
             logger.debug(
                 "hourOfDayRate for sample '%s' in app '%s' is %s" %
                 (self._sample.name, self._sample.app, rate))
             rateFactor *= rate
         except KeyError:
             import traceback
             stack = traceback.format_exc()
             logger.error(
                 "Hour of day rate failed for sample '%s'.  Stacktrace %s" %
                 (self._sample.name, stack))
     if type(self._sample.dayOfWeekRate) == dict:
         try:
             weekday = datetime.date.weekday(self._sample.now())
             if weekday == 6:
                 weekday = 0
             else:
                 weekday += 1
             rate = self._sample.dayOfWeekRate[str(weekday)]
             logger.debug(
                 "dayOfWeekRate for sample '%s' in app '%s' is %s" %
                 (self._sample.name, self._sample.app, rate))
             rateFactor *= rate
         except KeyError:
             import traceback
             stack = traceback.format_exc()
             logger.error(
                 "Hour of day rate failed for sample '%s'.  Stacktrace %s" %
                 (self._sample.name, stack))
     if type(self._sample.minuteOfHourRate) == dict:
         try:
             rate = self._sample.minuteOfHourRate[str(
                 self._sample.now().minute)]
             logger.debug(
                 "minuteOfHourRate for sample '%s' in app '%s' is %s" %
                 (self._sample.name, self._sample.app, rate))
             rateFactor *= rate
         except KeyError:
             import traceback
             stack = traceback.format_exc()
             logger.error(
                 "Minute of hour rate failed for sample '%s'.  Stacktrace %s"
                 % (self._sample.name, stack))
     if type(self._sample.dayOfMonthRate) == dict:
         try:
             rate = self._sample.dayOfMonthRate[str(self._sample.now().day)]
             logger.debug(
                 "dayOfMonthRate for sample '%s' in app '%s' is %s" %
                 (self._sample.name, self._sample.app, rate))
             rateFactor *= rate
         except KeyError:
             import traceback
             stack = traceback.format_exc()
             logger.error(
                 "Day of Month rate for sample '%s' failed.  Stacktrace %s"
                 % (self._sample.name, stack))
     if type(self._sample.monthOfYearRate) == dict:
         try:
             rate = self._sample.monthOfYearRate[str(
                 self._sample.now().month)]
             logger.debug(
                 "monthOfYearRate for sample '%s' in app '%s' is %s" %
                 (self._sample.name, self._sample.app, rate))
             rateFactor *= rate
         except KeyError:
             import traceback
             stack = traceback.format_exc()
             logger.error(
                 "Month Of Year rate failed for sample '%s'.  Stacktrace %s"
                 % (self._sample.name, stack))
     ret = int(round(count * rateFactor, 0))
     if rateFactor != 1.0:
         logger.debug("Original count: %s Rated count: %s Rate factor: %s" %
                      (count, ret, rateFactor))
     return ret
Beispiel #24
0
    def loadSample(self):
        """
        Load sample from disk into self._sample.sampleLines and self._sample.sampleDict, using cached copy if possible
        """
        if self.sampletype == "raw":
            # 5/27/12 CS Added caching of the sample file
            if self.sampleDict is None:
                with open(self.filePath, "r") as fh:
                    if self.breaker == self.config.breaker:
                        logger.debug("Reading raw sample '%s' in app '%s'" %
                                     (self.name, self.app))
                        self.sampleLines = fh.readlines()
                    # 1/5/14 CS Moving to using only sampleDict and doing the breaking up into events at load time
                    # instead of on every generation
                    else:
                        logger.debug(
                            "Non-default breaker '%s' detected for sample '%s' in app '%s'"
                            % (self.breaker, self.name, self.app))

                        sampleData = fh.read()
                        self.sampleLines = []

                        logger.debug(
                            "Filling array for sample '%s' in app '%s'; sampleData=%s, breaker=%s"
                            % (self.name, self.app, len(sampleData),
                               self.breaker))

                        try:
                            breakerRE = re.compile(self.breaker, re.M)
                        except:
                            logger.error(
                                "Line breaker '%s' for sample '%s' in app '%s'"
                                " could not be compiled; using default breaker",
                                self.breaker,
                                self.name,
                                self.app,
                            )
                            self.breaker = self.config.breaker

                        # Loop through data, finding matches of the regular expression and breaking them up into
                        # "lines".  Each match includes the breaker itself.
                        extractpos = 0
                        searchpos = 0
                        breakerMatch = breakerRE.search(sampleData, searchpos)
                        while breakerMatch:
                            logger.debug("Breaker found at: %d, %d" %
                                         (breakerMatch.span()[0],
                                          breakerMatch.span()[1]))
                            # Ignore matches at the beginning of the file
                            if breakerMatch.span()[0] != 0:
                                self.sampleLines.append(
                                    sampleData[extractpos:breakerMatch.span(
                                    )[0]])
                                extractpos = breakerMatch.span()[0]
                            searchpos = breakerMatch.span()[1]
                            breakerMatch = breakerRE.search(
                                sampleData, searchpos)
                        self.sampleLines.append(sampleData[extractpos:])

                self.sampleDict = []
                for line in self.sampleLines:
                    if line == "\n":
                        continue
                    if line and line[-1] != "\n":
                        line = line + "\n"
                    self.sampleDict.append({
                        "_raw": line,
                        "index": self.index,
                        "host": self.host,
                        "source": self.source,
                        "sourcetype": self.sourcetype,
                    })
                logger.debug(
                    "Finished creating sampleDict & sampleLines.  Len samplesLines: %d Len sampleDict: %d"
                    % (len(self.sampleLines), len(self.sampleDict)))
        elif self.sampletype == "csv":
            if self.sampleDict is None:
                with open(self.filePath, "r") as fh:
                    logger.debug("Reading csv sample '%s' in app '%s'" %
                                 (self.name, self.app))
                    self.sampleDict = []
                    self.sampleLines = []
                    # Fix to load large csv files, work with python 2.5 onwards
                    csv.field_size_limit(sys.maxsize)
                    csvReader = csv.DictReader(fh)
                    for line in csvReader:
                        if "_raw" in line:
                            # Use conf-defined values for these params instead of sample-defined ones
                            current_line_keys = list(line.keys())
                            if "host" not in current_line_keys:
                                line["host"] = self.host
                            if "hostRegex" not in current_line_keys:
                                line["hostRegex"] = self.hostRegex
                            if "source" not in current_line_keys:
                                line["source"] = self.source
                            if "sourcetype" not in current_line_keys:
                                line["sourcetype"] = self.sourcetype
                            if "index" not in current_line_keys:
                                line["index"] = self.index
                            self.sampleDict.append(line)
                            self.sampleLines.append(line["_raw"])
                        else:
                            logger.error("Missing _raw in line '%s'" %
                                         pprint.pformat(line))

                logger.debug(
                    "Finished creating sampleDict & sampleLines for sample '%s'.  Len sampleDict: %d"
                    % (self.name, len(self.sampleDict)))

                for i in range(0, len(self.sampleDict)):
                    if (len(self.sampleDict[i]["_raw"]) < 1
                            or self.sampleDict[i]["_raw"][-1] != "\n"):
                        self.sampleDict[i]["_raw"] += "\n"
        if self.extendIndexes:
            try:
                for index_item in self.extendIndexes.split(","):
                    index_item = index_item.strip()
                    if ":" in index_item:
                        extend_indexes_count = int(index_item.split(":")[-1])
                        extend_indexes_prefix = index_item.split(":")[0] + "{}"
                        self.index_list.extend([
                            extend_indexes_prefix.format(_i)
                            for _i in range(extend_indexes_count)
                        ])
                    elif len(index_item):
                        self.index_list.append(index_item)
            except Exception:
                logger.error(
                    "Failed to parse extendIndexes, using index={} now.".
                    format(self.index))
                self.index_list = []
            finally:
                # only read the extendIndexes configure once.
                self.extendIndexes = None
Beispiel #25
0
    def setupBackfill(self):
        """
        Called by non-queueable plugins or by the timer to setup backfill times per config or based on a Splunk Search
        """
        s = self._sample

        if s.backfill is not None:
            try:
                s.backfillts = timeParser(s.backfill, timezone=s.timezone)
                logger.info("Setting up backfill of %s (%s)" %
                            (s.backfill, s.backfillts))
            except Exception as ex:
                logger.error("Failed to parse backfill '%s': %s" %
                             (s.backfill, ex))
                raise

            if s.backfillSearch is not None:
                if s.backfillSearchUrl is None:
                    try:
                        s.backfillSearchUrl = c.getSplunkUrl(s)[
                            0]  # noqa, we update c in the globals() dict
                    except ValueError:
                        logger.error(
                            "Backfill Search URL not specified for sample '%s', not running backfill search"
                            % s.name)
                if not s.backfillSearch.startswith('search'):
                    s.backfillSearch = 'search ' + s.backfillSearch
                s.backfillSearch += '| head 1 | table _time'

                if s.backfillSearchUrl is not None:
                    logger.debug(
                        "Searching Splunk URL '%s/services/search/jobs' with search '%s' with sessionKey '%s'"
                        %
                        (s.backfillSearchUrl, s.backfillSearch, s.sessionKey))

                    results = httplib2.Http(
                        disable_ssl_certificate_validation=True).request(
                            s.backfillSearchUrl + '/services/search/jobs',
                            'POST',
                            headers={
                                'Authorization': 'Splunk %s' % s.sessionKey
                            },
                            body=urllib.parse.urlencode({
                                'search': s.backfillSearch,
                                'earliest_time': s.backfill,
                                'exec_mode': 'oneshot'
                            }))[1]
                    try:
                        temptime = minidom.parseString(
                            results).getElementsByTagName(
                                'text')[0].childNodes[0].nodeValue
                        # logger.debug("Time returned from backfill search: %s" % temptime)
                        # Results returned look like: 2013-01-16T10:59:15.411-08:00
                        # But the offset in time can also be +, so make sure we strip that out first
                        if len(temptime) > 0:
                            if temptime.find('+') > 0:
                                temptime = temptime.split('+')[0]
                            temptime = '-'.join(temptime.split('-')[0:3])
                        s.backfillts = datetime.datetime.strptime(
                            temptime, '%Y-%m-%dT%H:%M:%S.%f')
                        logger.debug(
                            "Backfill search results: '%s' value: '%s' time: '%s'"
                            %
                            (pprint.pformat(results), temptime, s.backfillts))
                    except (ExpatError, IndexError):
                        pass

        if s.end is not None:
            parsed = False
            try:
                s.end = int(s.end)
                s.endts = None
                parsed = True
            except ValueError:
                logger.debug(
                    "Failed to parse end '%s' for sample '%s', treating as end time"
                    % (s.end, s.name))

            if not parsed:
                try:
                    s.endts = timeParser(s.end, timezone=s.timezone)
                    logger.info("Ending generation at %s (%s)" %
                                (s.end, s.endts))
                except Exception as ex:
                    logger.error(
                        "Failed to parse end '%s' for sample '%s', treating as number of executions"
                        % (s.end, s.name))
                    raise
Beispiel #26
0
    def gen(self, count, earliest, latest, samplename=None):
        # TODO: Figure out how to gracefully tell generator plugins to exit when there is an error.
        try:
            from jinja2 import Environment, FileSystemLoader
            self.target_count = count
            # assume that if there is no "count" field, we want to run 1 time, and only one time.
            if self.target_count == -1:
                self.target_count = 1
            self.earliest = earliest
            self.latest = latest
            if hasattr(self._sample, "jinja_count_type"):
                if self._sample.jinja_count_type in [
                        "line_count", "cycles", "perDayVolume"
                ]:
                    self.jinja_count_type = self._sample.jinja_count_type
            startTime = datetime.datetime.now()

            # if eventgen is running as Splunk app the configfile is None
            sample_dir = self._sample.sampleDir
            if self._sample.splunkEmbedded is True:
                splunk_home = os.environ["SPLUNK_HOME"]
                app_name = getattr(self._sample, 'app', 'SA-Eventgen')
                sample_dir = os.path.join(splunk_home, 'etc', 'apps', app_name,
                                          'samples')

            if not hasattr(self._sample, "jinja_template_dir"):
                template_dir = 'templates'
            else:
                template_dir = self._sample.jinja_template_dir

            if not os.path.isabs(template_dir):
                target_template_dir = os.path.join(sample_dir, template_dir)
            else:
                target_template_dir = template_dir
            logger.info('set jinja template path to %s', target_template_dir)

            if not hasattr(self._sample, "jinja_target_template"):
                raise CantFindTemplate(
                    "Template to load not specified in eventgen conf for stanza.  Skipping Stanza"
                )
            jinja_env = Environment(loader=FileSystemLoader(
                [target_template_dir], encoding='utf-8', followlinks=False),
                                    extensions=[
                                        'jinja2.ext.do', 'jinja2.ext.with_',
                                        'jinja2.ext.loopcontrols', JinjaTime
                                    ],
                                    line_statement_prefix="#",
                                    line_comment_prefix="##")

            jinja_loaded_template = jinja_env.get_template(
                str(self._sample.jinja_target_template))
            if hasattr(self._sample, 'jinja_variables'):
                jinja_loaded_vars = json.loads(self._sample.jinja_variables)
            else:
                jinja_loaded_vars = None
            # make the default generator vars accessable to jinja
            jinja_loaded_vars["eventgen_count"] = self.current_count
            jinja_loaded_vars["eventgen_maxcount"] = self.target_count
            jinja_loaded_vars["eventgen_earliest"] = self.earliest
            self.earliest_epoch = (
                self.earliest - datetime.datetime(1970, 1, 1)).total_seconds()
            jinja_loaded_vars["eventgen_earliest_epoch"] = self.earliest_epoch
            jinja_loaded_vars["eventgen_latest"] = self.latest
            jinja_loaded_vars["eventgen_latest_epoch"] = (
                self.latest - datetime.datetime(1970, 1, 1)).total_seconds()
            self.latest_epoch = (
                self.latest - datetime.datetime(1970, 1, 1)).total_seconds()
            while self.current_count < self.target_count:
                self.end_of_cycle = False
                jinja_loaded_vars["eventgen_count"] = self.current_count
                jinja_loaded_vars["eventgen_target_time_earliest"], jinja_loaded_vars["eventgen_target_time_latest"], \
                    jinja_loaded_vars["eventgen_target_time_slice_size"], \
                    jinja_loaded_vars["eventgen_target_time_epoch"] = \
                    JinjaTime._get_time_slice(self.earliest_epoch, self.latest_epoch, self.target_count,
                                              self.current_count, slice_type="random")
                self.jinja_stream = jinja_loaded_template.stream(
                    jinja_loaded_vars)
                lines_out = []
                try:
                    for raw_line in self.jinja_stream:
                        # trim the newline char for jinja output
                        # it is quite normal to output empty newlines in jinja
                        line = raw_line.strip()
                        if line:
                            # TODO: Time can be supported by self._sample.timestamp, should probably set that up here.
                            try:
                                target_line = json.loads(line)
                            except ValueError as e:
                                logger.error(
                                    "Unable to parse Jinja's return.  Line: {0}"
                                    .format(line))
                                logger.error(
                                    "Parse Failure Reason: {0}".format(
                                        e.message))
                                logger.error(
                                    "Please note, you must meet the requirements for json.loads in python if you have"
                                    +
                                    "not installed ujson. Native python does not support multi-line events."
                                )
                                continue
                            current_line_keys = list(target_line.keys())
                            if "_time" not in current_line_keys:
                                # TODO: Add a custom exception here
                                raise Exception(
                                    "No _time field supplied, please add time to your jinja template."
                                )
                            if "_raw" not in current_line_keys:
                                # TODO: Add a custom exception here
                                raise Exception(
                                    "No _raw field supplied, please add time to your jinja template."
                                )
                            if "host" not in current_line_keys:
                                target_line["host"] = self._sample.host
                            if "hostRegex" not in current_line_keys:
                                target_line[
                                    "hostRegex"] = self._sample.hostRegex
                            if "source" not in current_line_keys:
                                target_line["source"] = self._sample.source
                            if "sourcetype" not in current_line_keys:
                                target_line[
                                    "sourcetype"] = self._sample.sourcetype
                            if "index" not in current_line_keys:
                                target_line["index"] = self._sample.index
                            lines_out.append(target_line)
                except TypeError as e:
                    logger.exception(str(e))
                self.end_of_cycle = True
                self._increment_count(lines_out)
                self._out.bulksend(lines_out)
            endTime = datetime.datetime.now()
            timeDiff = endTime - startTime
            timeDiffFrac = "%d.%06d" % (timeDiff.seconds,
                                        timeDiff.microseconds)
            logger.debug("Interval complete, flushing feed")
            self._out.flush(endOfInterval=True)
            logger.info("Generation of sample '%s' completed in %s seconds." %
                        (self._sample.name, timeDiffFrac))
            return 0
        except Exception as e:
            logger.exception(str(e))
            return 1
Beispiel #27
0
    def _getReplacement(self,
                        old=None,
                        earliestTime=None,
                        latestTime=None,
                        s=None,
                        pivot_timestamp=None):
        if self.replacementType == 'static':
            return self.replacement
        # This logic is done in replay.py
        elif self.replacementType == 'replaytimestamp':
            pass
        elif self.replacementType == 'timestamp':
            if s.earliest and s.latest:
                if earliestTime and latestTime:
                    if latestTime >= earliestTime:
                        if pivot_timestamp:
                            replacementTime = pivot_timestamp
                        elif s.timestamp is None:
                            minDelta = 0

                            # Compute timeDelta as total_seconds
                            td = latestTime - earliestTime
                            if not type(td) == float:
                                maxDelta = timeDelta2secs(td)
                            else:
                                maxDelta = td

                            # Get random timeDelta
                            randomDelta = datetime.timedelta(
                                seconds=random.randint(minDelta, maxDelta),
                                microseconds=random.randint(
                                    0, latestTime.microsecond
                                    if latestTime.microsecond > 0 else 999999))

                            # Compute replacmentTime
                            replacementTime = latestTime - randomDelta
                            s.timestamp = replacementTime
                        else:
                            replacementTime = s.timestamp

                        replacement = self.replacement.replace(
                            '%s',
                            str(round(time.mktime(
                                replacementTime.timetuple()))).rstrip(
                                    '0').rstrip('.'))
                        replacementTime = replacementTime.strftime(replacement)
                        # replacementTime == replacement for invalid strptime specifiers
                        if replacementTime != self.replacement.replace(
                                '%', ''):
                            return replacementTime
                        else:
                            logger.error(
                                "Invalid strptime specifier '%s' detected; will not replace"
                                % (self.replacement))
                            return old
                    # earliestTime/latestTime not proper
                    else:
                        logger.error((
                            "Earliest specifier '%s', value '%s' is greater than latest specifier '%s'"
                            + "value '%s' for sample '%s'; will not replace") %
                                     (s.earliest, earliestTime, s.latest,
                                      latestTime, s.name))
                        return old
            # earliest/latest not proper
            else:
                logger.error(
                    'Earliest or latest specifier were not set; will not replace'
                )
                return old
        elif self.replacementType in ('random', 'rated'):
            # Validations:
            if self._integerMatch is not None:
                integerMatch = self._integerMatch
            else:
                integerRE = re.compile(r'integer\[([-]?\d+):([-]?\d+)\]', re.I)
                integerMatch = integerRE.match(self.replacement)
                self._integerMatch = integerMatch

            if self._floatMatch is not None:
                floatMatch = self._floatMatch
            else:
                floatRE = re.compile(
                    r'float\[(-?\d+|-?\d+\.(\d+)):(-?\d+|-?\d+\.(\d+))\]',
                    re.I)
                floatMatch = floatRE.match(self.replacement)
                self._floatMatch = floatMatch

            if self._stringMatch is not None:
                stringMatch = self._stringMatch
            else:
                stringRE = re.compile(r'string\((\d+)\)', re.I)
                stringMatch = stringRE.match(self.replacement)
                self._stringMatch = stringMatch

            if self._hexMatch is not None:
                hexMatch = self._hexMatch
            else:
                hexRE = re.compile(r'hex\((\d+)\)', re.I)
                hexMatch = hexRE.match(self.replacement)
                self._hexMatch = hexMatch

            if self._listMatch is not None:
                listMatch = self._listMatch
            else:
                listRE = re.compile(r'list(\[[^\]]+\])', re.I)
                listMatch = listRE.match(self.replacement)
                self._listMatch = listMatch

            # Valid replacements: ipv4 | ipv6 | integer[<start>:<end>] | string(<i>)
            if self.replacement.lower() == 'ipv4':
                x = 0
                replacement = ''

                while x < 4:
                    replacement += str(random.randint(0, 255)) + '.'
                    x += 1

                replacement = replacement.strip('.')
                return replacement
            elif self.replacement.lower() == 'ipv6':
                x = 0
                replacement = ''

                while x < 8:
                    replacement += hex(random.randint(0, 65535))[2:] + ':'
                    x += 1

                replacement = replacement.strip(':')
                return replacement
            elif self.replacement.lower() == 'mac':
                x = 0
                replacement = ''

                # Give me 6 blocks of 2 hex
                while x < 6:
                    y = 0
                    while y < 2:
                        replacement += hex(random.randint(0, 15))[2:]
                        y += 1
                    replacement += ':'
                    x += 1

                replacement = replacement.strip(':')
                return replacement
            elif self.replacement.lower() == 'guid':
                return str(uuid.uuid4())
            elif integerMatch:
                startInt = int(integerMatch.group(1))
                endInt = int(integerMatch.group(2))

                if endInt >= startInt:
                    replacementInt = random.randint(startInt, endInt)
                    if self.replacementType == 'rated':
                        rateFactor = 1.0
                        if type(s.hourOfDayRate) == dict:
                            try:
                                rateFactor *= s.hourOfDayRate[str(s.now())]
                            except KeyError:
                                import traceback
                                stack = traceback.format_exc()
                                logger.error(
                                    "Hour of day rate failed for token %s.  Stacktrace %s"
                                    % stack)
                        if type(s.dayOfWeekRate) == dict:
                            try:
                                weekday = datetime.date.weekday(s.now())
                                if weekday == 6:
                                    weekday = 0
                                else:
                                    weekday += 1
                                rateFactor *= s.dayOfWeekRate[str(weekday)]
                            except KeyError:
                                import traceback
                                stack = traceback.format_exc()
                                logger.error(
                                    "Day of week rate failed.  Stacktrace %s" %
                                    stack)
                        replacementInt = int(
                            round(replacementInt * rateFactor, 0))
                    replacement = str(replacementInt)
                    return replacement
                else:
                    logger.error(
                        "Start integer %s greater than end integer %s; will not replace"
                        % (startInt, endInt))
                    return old
            elif floatMatch:
                try:
                    startFloat = float(floatMatch.group(1))
                    endFloat = float(floatMatch.group(3))

                    significance = 0
                    if floatMatch.group(2) is not None:
                        significance = len(floatMatch.group(2))

                    if endFloat >= startFloat:
                        floatret = round(random.uniform(startFloat, endFloat),
                                         significance)
                        if self.replacementType == 'rated':
                            rateFactor = 1.0
                            now = s.now()
                            if type(s.hourOfDayRate) == dict:
                                try:
                                    rateFactor *= s.hourOfDayRate[str(
                                        now.hour)]
                                except KeyError:
                                    import traceback
                                    stack = traceback.format_exc()
                                    logger.error(
                                        "Hour of day rate failed for token %s.  Stacktrace %s"
                                        % stack)
                            if type(s.dayOfWeekRate) == dict:
                                try:
                                    weekday = datetime.date.weekday(now)
                                    if weekday == 6:
                                        weekday = 0
                                    else:
                                        weekday += 1
                                    rateFactor *= s.dayOfWeekRate[str(weekday)]
                                except KeyError:
                                    import traceback
                                    stack = traceback.format_exc()
                                    logger.error(
                                        "Day of week rate failed.  Stacktrace %s"
                                        % stack)
                            floatret = round(floatret * rateFactor,
                                             significance)
                        floatret = str(floatret)
                        return floatret
                    else:
                        logger.error(
                            "Start float %s greater than end float %s; will not replace"
                            % (startFloat, endFloat))
                        return old
                except ValueError:
                    logger.error("Could not parse float[%s:%s]" %
                                 (floatMatch.group(1), floatMatch.group(4)))
                    return old
            elif stringMatch:
                strLength = int(stringMatch.group(1))
                if strLength == 0:
                    return ''
                elif strLength > 0:
                    replacement = ''
                    while len(replacement) < strLength:
                        # Generate a random ASCII between dec 33->126
                        replacement += chr(random.randint(33, 126))
                        # Practice safe strings
                        replacement = re.sub('%[0-9a-fA-F]+', '',
                                             urllib.parse.quote(replacement))

                    return replacement
                else:
                    logger.error(
                        "Length specifier %s for string replacement must be greater than 0; will not replace"
                        % (strLength))
                    return old
            elif hexMatch:
                strLength = int(hexMatch.group(1))

                replacement = ''
                hexList = [
                    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B',
                    'C', 'D', 'E', 'F'
                ]
                while len(replacement) < strLength:
                    replacement += hexList[random.randint(0, 15)]

                return replacement
            elif listMatch:
                try:
                    value = json.loads(listMatch.group(1))
                except:
                    logger.error(
                        "Could not parse json for '%s' in sample '%s'" %
                        (listMatch.group(1), s.name))
                    return old
                return random.SystemRandom().choice(value)

            else:
                logger.error(
                    "Unknown replacement value '%s' for replacementType '%s'; will not replace"
                    % (self.replacement, self.replacementType))
                return old
        elif self.replacementType in ('file', 'mvfile', 'seqfile'):
            if self._replacementFile is not None:
                replacementFile = self._replacementFile
                replacementColumn = self._replacementColumn
            else:
                try:
                    paths = self.replacement.split(':')
                    if (len(paths) == 1):
                        replacementColumn = 0
                    else:
                        try:  # When it's not a mvfile, there's no number on the end:
                            replacementColumn = int(paths[-1])
                        except (ValueError):
                            replacementColumn = 0
                    if (replacementColumn > 0):
                        # This supports having a drive-letter colon
                        replacementFile = s.pathParser(":".join(paths[0:-1]))
                    else:
                        replacementFile = s.pathParser(self.replacement)
                except ValueError:
                    logger.error(
                        "Replacement string '%s' improperly formatted. Should be /path/to/file or /path/to/file:column"
                        % self.replacement)
                    return old
                self._replacementFile = replacementFile
                self._replacementColumn = replacementColumn

            # If we've seen this file before, simply return already read results
            # This applies only if we're looking at a multivalue file and we want to
            # return the same random pick on every iteration
            if replacementColumn > 0 and replacementFile in self.mvhash:
                if replacementColumn > len(self.mvhash[replacementFile]):
                    logger.error(
                        "Index for column '%s' in replacement file '%s' is out of bounds"
                        % (replacementColumn, replacementFile))
                    return old
                else:
                    # logger.debug("Returning mvhash: %s" % self.mvhash[replacementFile][replacementColumn-1])
                    return self.mvhash[replacementFile][replacementColumn - 1]
            else:
                # Adding caching of the token file to avoid reading it every iteration
                if self._tokenfile is not None:
                    replacementLines = self._tokenfile
                # Otherwise, lets read the file and build our cached results, pick a result and return it
                else:
                    # logger.debug("replacementFile: %s replacementColumn: %s" %
                    #                   (replacementFile, replacementColumn))
                    replacementFile = os.path.abspath(replacementFile)
                    logger.debug("Normalized replacement file %s" %
                                 replacementFile)
                    if os.path.exists(replacementFile) and os.path.isfile(
                            replacementFile):
                        replacementFH = open(replacementFile, 'rU')
                        replacementLines = replacementFH.readlines()
                        replacementFH.close()

                        if len(replacementLines) == 0:
                            logger.error(
                                "Replacement file '%s' is empty; will not replace"
                                % (replacementFile))
                            return old
                        else:
                            self._tokenfile = replacementLines
                    else:
                        logger.error("File '%s' does not exist" %
                                     (replacementFile))
                        return old
                if self.replacementType == 'seqfile':
                    # pick value one by one from replacement file
                    replacement = replacementLines[
                        self._tokenfilecounter %
                        len(replacementLines)].strip()
                    self._tokenfilecounter += 1
                else:
                    # pick value randomly from replacement file
                    replacement = replacementLines[random.randint(
                        0,
                        len(replacementLines) - 1)].strip()

                if replacementColumn > 0:
                    self.mvhash[replacementFile] = replacement.split(',')

                    if replacementColumn > len(self.mvhash[replacementFile]):
                        logger.error(
                            "Index for column '%s' in replacement file '%s' is out of bounds"
                            % (replacementColumn, replacementFile))
                        return old
                    else:
                        return self.mvhash[replacementFile][replacementColumn -
                                                            1]
                else:
                    return replacement
        elif self.replacementType == 'integerid':
            temp = self.replacement
            self.replacement = str(int(self.replacement) + 1)
            return temp

        else:
            logger.error("Unknown replacementType '%s'; will not replace" %
                         self.replacementType)
            return old
Beispiel #28
0
    def gen(self, count, earliest, latest, samplename=None):
        # 9/8/15 CS Check to make sure we have events to replay
        self._sample.loadSample()
        previous_event = None
        previous_event_timestamp = None
        self.current_time = self._sample.now()

        # If backfill exists, calculate the start of the backfill time relative to the current time.
        # Otherwise, backfill time equals to the current time
        self.backfill_time = self._sample.get_backfill_time(self.current_time)

        if not self._sample.backfill or self._sample.backfilldone:
            self.backfill_time = EventgenTimestamp.get_random_timestamp_backfill(
                earliest, latest, self._sample.earliest, self._sample.latest)

        for line in self._sample.get_loaded_sample():
            # Add newline to a raw line if necessary
            try:
                if line['_raw'][-1] != '\n':
                    line['_raw'] += '\n'

                index = line.get('index', self._sample.index)
                host = line.get('host', self._sample.host)
                hostRegex = line.get('hostRegex', self._sample.hostRegex)
                source = line.get('source', self._sample.source)
                sourcetype = line.get('sourcetype', self._sample.sourcetype)
                rpevent = {
                    '_raw': line['_raw'],
                    'index': index,
                    'host': host,
                    'hostRegex': hostRegex,
                    'source': source,
                    'sourcetype': sourcetype
                }
            except:
                if line[-1] != '\n':
                    line += '\n'

                rpevent = {
                    '_raw': line,
                    'index': self._sample.index,
                    'host': self._sample.host,
                    'hostRegex': self._sample.hostRegex,
                    'source': self._sample.source,
                    'sourcetype': self._sample.sourcetype
                }

            # If timestamp doesn't exist, the sample file should be fixed to include timestamp for every event.
            try:
                current_event_timestamp = self._sample.getTSFromEvent(
                    rpevent[self._sample.timeField])
            except Exception:
                try:
                    current_event_timestamp = self._sample.getTSFromEvent(
                        line[self._sample.timeField])
                except Exception:
                    try:
                        logger.error(
                            "Sample timeField {} failed to locate. Trying to locate _time field."
                            .format(self._sample.timeField))
                        current_event_timestamp = self._sample.getTSFromEvent(
                            line["_time"])
                    except Exception:
                        logger.exception(
                            "Extracting timestamp from an event failed.")
                        continue

            # Always flush the first event
            if previous_event is None:
                previous_event = rpevent
                previous_event_timestamp = current_event_timestamp
                self.set_time_and_send(rpevent, self.backfill_time, earliest,
                                       latest)
                continue

            # Refer to the last event to calculate the new backfill time
            time_difference = datetime.timedelta(
                seconds=(current_event_timestamp -
                         previous_event_timestamp).total_seconds() *
                self._sample.timeMultiple)

            if self.backfill_time + time_difference >= self.current_time:
                sleep_time = time_difference - (self.current_time -
                                                self.backfill_time)
                if self._sample.backfill and not self._sample.backfilldone:
                    time.sleep(sleep_time.seconds)
                self.current_time += sleep_time
                self.backfill_time = self.current_time
            else:
                self.backfill_time += time_difference
            previous_event = rpevent
            previous_event_timestamp = current_event_timestamp
            self.set_time_and_send(rpevent, self.backfill_time, earliest,
                                   latest)

        self._out.flush(endOfInterval=True)
        return
Beispiel #29
0
    def load_sample_file(self):
        line_list = []
        for line in self._sample.get_loaded_sample():
            # Add newline to a raw line if necessary
            try:
                if line["_raw"][-1] != "\n":
                    line["_raw"] += "\n"
                current_event_timestamp = False
                index = line.get("index", self._sample.index)
                host = line.get("host", self._sample.host)
                hostRegex = line.get("hostRegex", self._sample.hostRegex)
                source = line.get("source", self._sample.source)
                sourcetype = line.get("sourcetype", self._sample.sourcetype)
                rpevent = {
                    "_raw": line["_raw"],
                    "index": index,
                    "host": host,
                    "hostRegex": hostRegex,
                    "source": source,
                    "sourcetype": sourcetype,
                }
            except:
                if line[-1] != "\n":
                    line += "\n"

                rpevent = {
                    "_raw": line,
                    "index": self._sample.index,
                    "host": self._sample.host,
                    "hostRegex": self._sample.hostRegex,
                    "source": self._sample.source,
                    "sourcetype": self._sample.sourcetype,
                }
            try:
                current_event_timestamp = self._sample.getTSFromEvent(
                    rpevent[self._sample.timeField])
                rpevent["base_time"] = current_event_timestamp
            except Exception:
                try:
                    current_event_timestamp = self._sample.getTSFromEvent(
                        line[self._sample.timeField])
                    rpevent["base_time"] = current_event_timestamp
                except Exception:
                    try:
                        logger.error(
                            "Sample timeField {} failed to locate. Trying to locate _time field."
                            .format(self._sample.timeField))
                        current_event_timestamp = self._sample.getTSFromEvent(
                            line["_time"])
                    except Exception:
                        logger.exception(
                            "Extracting timestamp from an event failed.")
                        continue
            line_list.append(rpevent)
        # now interate the list 1 time and figure out the time delta of every event
        current_event = None
        previous_event = None
        for index, line in enumerate(line_list):
            current_event = line
            # if it's the first event, there is no previous event.
            if index == 0:
                previous_event = current_event
            else:
                previous_event = line_list[index - 1]
            # Refer to the last event to calculate the new backfill time
            time_difference = (
                current_event["base_time"] -
                previous_event["base_time"]) * self._sample.timeMultiple
            current_event["timediff"] = time_difference
        return line_list
Beispiel #30
0
    def flush(self, q):
        if len(q) > 0:
            # Store each source/sourcetype combo with its events so we can send them all together
            queues = {}
            for row in q:
                if row["source"] is None:
                    row["source"] = ""
                if row["sourcetype"] is None:
                    row["sourcetype"] = ""
                if not row["source"] + "_" + row["sourcetype"] in queues:
                    queues[row["source"] + "_" + row["sourcetype"]] = deque([])
                queues[row["source"] + "_" + row["sourcetype"]].append(row)

            # Iterate sub-queues, each holds events for a specific source/sourcetype combo
            for k, queue in list(queues.items()):
                if len(queue) > 0:
                    streamout = ""
                    index = source = sourcetype = host = hostRegex = None
                    metamsg = queue.popleft()
                    # We need the raw string for each event, but other data will stay the same within its own sub-queue
                    msg = metamsg["_raw"]
                    try:
                        index = metamsg["index"]
                        source = metamsg["source"]
                        sourcetype = metamsg["sourcetype"]
                        host = metamsg["host"]
                        hostRegex = metamsg["hostRegex"]
                    except KeyError:
                        pass

                    logger.debug(
                        "Flushing output for sample '%s' in app '%s' for queue '%s'"
                        % (self._sample.name, self._app, self._sample.source)
                    )
                    try:
                        if self._splunkMethod == "https":
                            connmethod = http.client.HTTPSConnection
                        else:
                            connmethod = http.client.HTTPConnection
                        splunkhttp = connmethod(self._splunkHost, self._splunkPort)
                        splunkhttp.connect()
                        urlparams = []
                        if index:
                            urlparams.append(("index", index))
                        if source:
                            urlparams.append(("source", source))
                        if sourcetype:
                            urlparams.append(("sourcetype", sourcetype))
                        if hostRegex:
                            urlparams.append(("host_regex", hostRegex))
                        if host:
                            urlparams.append(("host", host))
                        url = "/services/receivers/simple?%s" % (
                            urllib.parse.urlencode(urlparams)
                        )
                        headers = {
                            "Authorization": "Splunk %s" % self._sample.sessionKey
                        }

                        # Iterate each raw event string in its sub-queue
                        while msg:
                            if msg[-1] != "\n":
                                msg += "\n"
                            streamout += msg
                            try:
                                msg = queue.popleft()["_raw"]
                            except IndexError:
                                msg = False

                        splunkhttp.request("POST", url, streamout, headers)
                        logger.debug(
                            "POSTing to url %s on %s://%s:%s with sessionKey %s"
                            % (
                                url,
                                self._splunkMethod,
                                self._splunkHost,
                                self._splunkPort,
                                self._sample.sessionKey,
                            )
                        )

                    except http.client.HTTPException as e:
                        logger.error(
                            'Error connecting to Splunk for logging for sample %s.  Exception "%s" Config: %s'
                            % (self._sample.name, e.args, self)
                        )
                        raise IOError(
                            "Error connecting to Splunk for logging for sample %s"
                            % self._sample
                        )

                    try:
                        response = splunkhttp.getresponse()
                        data = response.read()
                        if response.status != 200:
                            logger.error(
                                "Data not written to Splunk.  Splunk returned %s" % data
                            )
                    except http.client.BadStatusLine:
                        logger.error(
                            "Received bad status from Splunk for sample '%s'"
                            % self._sample
                        )
                    logger.debug("Closing splunkhttp connection")
                    if splunkhttp:
                        splunkhttp.close()