Esempio n. 1
0
    def __init__(self, sample, output_counter=None):

        # Override maxQueueLength to EventPerKey so that each flush
        # will generate one aws key
        if sample.awsS3EventPerKey:
            sample.maxQueueLength = sample.awsS3EventPerKey

        OutputPlugin.__init__(self, sample, output_counter)

        if not boto_imported:
            logger.error("There is no boto3 or botocore library available")
            return

        # disable any "requests" warnings
        requests.packages.urllib3.disable_warnings()

        # Bind passed in samples to the outputter.
        self.awsS3compressiontype = (sample.awsS3CompressionType
                                     if hasattr(sample, "awsS3CompressionType")
                                     and sample.awsS3CompressionType else None)
        self.awsS3eventtype = (sample.awsS3EventType
                               if hasattr(sample, "awsS3EventType")
                               and sample.awsS3EventType else "syslog")
        self.awsS3objectprefix = (sample.awsS3ObjectPrefix
                                  if hasattr(sample, "awsS3ObjectPrefix")
                                  and sample.awsS3ObjectPrefix else "")
        self.awsS3objectsuffix = (sample.awsS3ObjectSuffix
                                  if hasattr(sample, "awsS3ObjectSuffix")
                                  and sample.awsS3ObjectSuffix else "")
        self.awsS3bucketname = sample.awsS3BucketName
        logger.debug("Setting up the connection pool for %s in %s" %
                     (self._sample.name, self._app))
        self._client = None
        self._createConnections(sample)
        logger.debug("Finished init of awsS3 plugin.")
Esempio n. 2
0
    def flush(self, q):
        if len(q) > 0:
            logger.debug(
                "Flushing output for sample '%s' in app '%s' for queue '%s'" %
                (self._sample.name, self._app, self._sample.source))

            # Loop through all the messages and build the long string, write once for each flush
            # This may cause the file exceed the maxFileBytes a little bit but will greatly improve the performance
            try:
                for metamsg in q:
                    msg = metamsg.get("_raw")
                    if not msg:
                        continue
                    if msg[-1] != "\n":
                        msg += "\n"

                    if self._fileLength + len(msg) <= self._fileMaxBytes:
                        self._fileHandle.write(msg)
                        self._fileLength += len(msg)
                    else:
                        self._fileHandle.flush()
                        self._fileHandle.close()

                        if os.path.exists(self._file + "." +
                                          str(self._fileBackupFiles)):
                            logger.debug("File Output: Removing file: %s" %
                                         self._file + "." +
                                         str(self._fileBackupFiles))
                            os.unlink(self._file + "." +
                                      str(self._fileBackupFiles))

                        for x in range(1, int(self._fileBackupFiles))[::-1]:
                            logger.debug("File Output: Checking for file: %s" %
                                         self._file + "." + str(x))
                            if os.path.exists(self._file + "." + str(x)):
                                logger.debug(
                                    "File Output: Renaming file %s to %s" % (
                                        self._file + "." + str(x),
                                        self._file + "." + str(x + 1),
                                    ))
                                os.rename(
                                    self._file + "." + str(x),
                                    self._file + "." + str(x + 1),
                                )

                        os.rename(self._file, self._file + ".1")
                        self._fileHandle = open(self._file, "w")
                        self._fileHandle.write(msg)
                        self._fileLength = len(msg)
            except IndexError:
                logger.warning(
                    "IndexError when writting for app '%s' sample '%s'" %
                    (self._app, self._sample.name))

            if not self._fileHandle.closed:
                self._fileHandle.flush()
            logger.debug("Queue for app '%s' sample '%s' written" %
                         (self._app, self._sample.name))

            self._fileHandle.close()
Esempio n. 3
0
 def build_events(self,
                  eventsDict,
                  startTime,
                  earliest,
                  latest,
                  ignore_tokens=False):
     """Ready events for output by replacing tokens and updating the output queue"""
     # Replace tokens first so that perDayVolume evaluates the correct event length
     send_objects = self.replace_tokens(eventsDict,
                                        earliest,
                                        latest,
                                        ignore_tokens=ignore_tokens)
     try:
         self._out.bulksend(send_objects)
         self._sample.timestamp = None
     except Exception as e:
         logger.exception("Exception {} happened.".format(type(e)))
         raise e
     try:
         # TODO: Change this logic so that we don't lose all events if an exception is hit (try/except/break?)
         endTime = datetime.datetime.now()
         timeDiff = endTime - startTime
         timeDiffFrac = "%d.%06d" % (timeDiff.seconds,
                                     timeDiff.microseconds)
         logger.debug("Interval complete, flushing feed")
         self._out.flush(endOfInterval=True)
         logger.debug(
             "Generation of sample '%s' in app '%s' completed in %s seconds."
             % (self._sample.name, self._sample.app, timeDiffFrac))
     except Exception as e:
         logger.exception("Exception {} happened.".format(type(e)))
         raise e
Esempio n. 4
0
 def __init__(self, time, sample=None, config=None, genqueue=None, outputqueue=None, loggingqueue=None):
     # Logger already setup by config, just get an instance
     # setup default options
     self.profiler = config.profiler
     self.config = config
     self.sample = sample
     self.end = getattr(self.sample, "end", -1)
     self.endts = getattr(self.sample, "endts", None)
     self.generatorQueue = genqueue
     self.outputQueue = outputqueue
     self.time = time
     self.stopping = False
     self.countdown = 0
     self.executions = 0
     self.interval = getattr(self.sample, "interval", config.interval)
     logger.debug('Initializing timer for %s' % sample.name if sample is not None else "None")
     # load plugins
     if self.sample is not None:
         rater_class = self.config.getPlugin('rater.' + self.sample.rater, self.sample)
         self.rater = rater_class(self.sample)
         self.generatorPlugin = self.config.getPlugin('generator.' + self.sample.generator, self.sample)
         self.outputPlugin = self.config.getPlugin('output.' + self.sample.outputMode, self.sample)
         if self.sample.timeMultiple < 0:
             logger.error("Invalid setting for timeMultiple: {}, value should be positive".format(
                 self.sample.timeMultiple))
         elif self.sample.timeMultiple != 1:
             self.interval = self.sample.interval
             logger.debug("Adjusting interval {} with timeMultiple {}, new interval: {}".format(
                 self.sample.interval, self.sample.timeMultiple, self.interval))
     logger.info(
         "Start '%s' generatorWorkers for sample '%s'" % (self.sample.config.generatorWorkers, self.sample.name))
Esempio n. 5
0
 def __init__(self, sample):
     super(PerDayVolume, self).__init__(sample)
     # Logger already setup by config, just get an instance
     logger.debug("Starting PerDayVolumeRater for %s" %
                  sample.name if sample is not None else "None")
     self.previous_count_left = 0
     self.raweventsize = 0
Esempio n. 6
0
 def getTSFromEvent(self, event, passed_token=None):
     currentTime = None
     formats = []
     # JB: 2012/11/20 - Can we optimize this by only testing tokens of type = *timestamp?
     # JB: 2012/11/20 - Alternatively, documentation should suggest putting timestamp as token.0.
     if passed_token is not None:
         tokens = [passed_token]
     else:
         tokens = self.tokens
     for token in tokens:
         try:
             formats.append(token.token)
             # logger.debug("Searching for token '%s' in event '%s'" % (token.token, event))
             results = token._search(event)
             if results:
                 timeFormat = token.replacement
                 group = 0 if len(results.groups()) == 0 else 1
                 timeString = results.group(group)
                 # logger.debug("Testing '%s' as a time string against '%s'" % (timeString, timeFormat))
                 if timeFormat == "%s":
                     ts = float(timeString) if len(timeString) < 10 else float(timeString) \
                          / (10**(len(timeString) - 10))
                     # logger.debug("Getting time for timestamp '%s'" % ts)
                     currentTime = datetime.datetime.fromtimestamp(ts)
                 else:
                     # logger.debug("Getting time for timeFormat '%s' and timeString '%s'" %
                     #                   (timeFormat, timeString))
                     # Working around Python bug with a non thread-safe strptime. Randomly get AttributeError
                     # when calling strptime, so if we get that, try again
                     while currentTime is None:
                         try:
                             # Checking for timezone adjustment
                             if timeString[-5] == "+":
                                 timeString = timeString[:-5]
                             currentTime = datetime.datetime.strptime(timeString, timeFormat)
                         except AttributeError:
                             pass
                 logger.debug("Match '%s' Format '%s' result: '%s'" % (timeString, timeFormat, currentTime))
                 if type(currentTime) == datetime.datetime:
                     break
         except ValueError:
             logger.warning("Match found ('%s') but time parse failed. Timeformat '%s' Event '%s'" %
                                 (timeString, timeFormat, event))
     if type(currentTime) != datetime.datetime:
         # Total fail
         if passed_token is None:  # If we're running for autotimestamp don't log error
             logger.warning(
                 "Can't find a timestamp (using patterns '%s') in this event: '%s'." % (formats, event))
         raise ValueError("Can't find a timestamp (using patterns '%s') in this event: '%s'." % (formats, event))
     # Check to make sure we parsed a year
     if currentTime.year == 1900:
         currentTime = currentTime.replace(year=self.now().year)
     # 11/3/14 CS So, this is breaking replay mode, and getTSFromEvent is only used by replay mode
     #            but I don't remember why I added these two lines of code so it might create a regression.
     #            Found the change on 6/14/14 but no comments as to why I added these two lines.
     # if self.timestamp == None:
     #     self.timestamp = currentTime
     return currentTime
Esempio n. 7
0
    def __init__(self, sample):
        logger.debug(
            "Starting ConfigRater for %s" % sample.name
            if sample is not None
            else "None"
        )

        self._sample = sample
        self._generatorWorkers = self._sample.config.generatorWorkers
Esempio n. 8
0
 def __init__(self, sample, output_counter=None):
     self._app = sample.app
     self._sample = sample
     self._outputMode = sample.outputMode
     self.events = None
     logger.debug("Starting OutputPlugin for sample '%s' with output '%s'" %
                  (self._sample.name, self._sample.outputMode))
     self._queue = deque([])
     self.output_counter = output_counter
Esempio n. 9
0
    def flush(self, events):
        if not self.scsEndPoint:
            if getattr(self.config, "scsEndPoint", None):
                self.scsEndPoint = self.config.scsEndPoint
            else:
                raise NoSCSEndPoint(
                    "Please specify your REST endpoint for the SCS tenant")

        if not self.scsAccessToken:
            if getattr(self.config, "scsAccessToken", None):
                self.scsAccessToken = self.config.scsAccessToken
            else:
                raise NoSCSAccessToken(
                    "Please specify your REST endpoint access token for the SCS tenant"
                )

        if self.scsClientId and self.scsClientSecret:
            logger.info(
                "Both scsClientId and scsClientSecret are supplied." +
                " We will renew the expired token using these credentials.")
            self.scsRenewToken = True
        else:
            if getattr(self.config, "scsClientId", None) and getattr(
                    self.config, "scsClientSecret", None):
                self.scsClientId = self.config.scsClientId
                self.scsClientSecret = self.config.scsClientSecret
                logger.info(
                    "Both scsClientId and scsClientSecret are supplied." +
                    " We will renew the expired token using these credentials."
                )
                self.scsRenewToken = True
            else:
                self.scsRenewToken = False

        self.header = {
            "Authorization": "Bearer {0}".format(self.scsAccessToken),
            "Content-Type": "application/json",
        }

        self.accessTokenExpired = False
        self.tokenRenewEndPoint = "https://auth.scp.splunk.com/token"
        self.tokenRenewBody = {
            "client_id": self.scsClientId,
            "client_secret": self.scsClientSecret,
            "grant_type": "client_credentials",
        }

        for i in range(self.scsRetryNum + 1):
            logger.debug("Sending data to the scs endpoint. Num:{0}".format(i))
            self._sendHTTPEvents(events)

            if not self.checkResults():
                if self.accessTokenExpired and self.scsRenewToken:
                    self.renewAccessToken()
                self.active_sessions = []
            else:
                break
Esempio n. 10
0
    def flush(self, endOfInterval=False):
        """
        Flushes output buffer, unless endOfInterval called, and then only flush if we've been called
        more than maxIntervalsBeforeFlush tunable.
        """
        # TODO: Fix interval flushing somehow with a queue, not sure I even want to support this feature anymore.
        '''if endOfInterval:
            logger.debugv("Sample calling flush, checking increment against maxIntervalsBeforeFlush")
            c.intervalsSinceFlush[self._sample.name].increment()
            if c.intervalsSinceFlush[self._sample.name].value() >= self._sample.maxIntervalsBeforeFlush:
                logger.debugv("Exceeded maxIntervalsBeforeFlush, flushing")
                flushing = True
                c.intervalsSinceFlush[self._sample.name].clear()
            else:
                logger.debugv("Not enough events to flush, passing flush routine.")
        else:
            logger.debugv("maxQueueLength exceeded, flushing")
            flushing = True'''

        # TODO: This is set this way just for the time being while I decide if we want this feature.
        flushing = True
        if flushing:
            q = self._queue
            logger.debug("Flushing queue for sample '%s' with size %d" %
                         (self._sample.name, len(q)))
            self._queue = []
            outputer = self.outputPlugin(self._sample, self.output_counter)
            outputer.updateConfig(self.config)
            outputer.set_events(q)
            # When an outputQueue is used, it needs to run in a single threaded nature which requires to be put back
            # into the outputqueue so a single thread worker can execute it. When an outputQueue is not used, it can be
            # ran by multiple processes or threads. Therefore, no need to put the outputer back into the Queue. Just
            # execute it.
            # if outputPlugin must be used for useOutputQueue, use outputQueue regardless of user config useOutputQueue:
            if self.outputPlugin.useOutputQueue or self.config.useOutputQueue:
                try:
                    self.outputQueue.put(outputer)
                except Full:
                    logger.warning("Output Queue full, looping again")
            else:
                if self.config.splunkEmbedded:
                    tmp = [len(s['_raw']) for s in q]
                    if len(tmp) > 0:
                        metrics_logger.info({
                            'timestamp':
                            datetime.datetime.strftime(datetime.datetime.now(),
                                                       '%Y-%m-%d %H:%M:%S'),
                            'sample':
                            self._sample.name,
                            'events':
                            len(tmp),
                            'bytes':
                            sum(tmp)
                        })
                    tmp = None
                outputer.run()
Esempio n. 11
0
 def run(self, output_counter=None):
     if output_counter is not None and hasattr(self.config, 'outputCounter') and self.config.outputCounter:
         # Use output_counter to calculate throughput
         self._out.setOutputCounter(output_counter)
     self.gen(count=self.count, earliest=self.start_time, latest=self.end_time, samplename=self._sample.name)
     # TODO: Make this some how handle an output queue and support intervals and a master queue
     # Just double check to see if there's something in queue to flush out at the end of run
     if len(self._out._queue) > 0:
         logger.debug("Queue is not empty, flush out at the end of each run")
         self._out.flush()
Esempio n. 12
0
 def update_throughput(self, timestamp):
     # B/s, count/s
     delta_time = timestamp - self.current_time
     self.throughput_volume = self.event_size_1_min / (delta_time)
     self.throughput_count = self.event_count_1_min / (delta_time)
     self.current_time = timestamp
     self.event_count_1_min = 0
     self.event_size_1_min = 0
     logger.debug("Current throughput is {} B/s, {} count/s".format(
         self.throughput_volume, self.throughput_count))
Esempio n. 13
0
 def predict_event_size(self):
     try:
         self.sample.loadSample()
         logger.debug("File sample loaded successfully.")
     except TypeError:
         logger.debug("Error loading sample file for sample '%s'" % self.sample.name)
         return
     total_len = sum([len(e['_raw']) for e in self.sample.sampleDict])
     sample_count = len(self.sample.sampleDict)
     if sample_count == 0:
         return 0
     else:
         return total_len/sample_count
Esempio n. 14
0
    def createConnections(self):
        self.serverPool = []
        if self.httpeventServers:
            for server in self.httpeventServers.get("servers"):
                if not server.get("address"):
                    logger.error(
                        "requested a connection to a httpevent server, but no address specified for sample %s"
                        % self._sample.name)
                    raise ValueError(
                        "requested a connection to a httpevent server, but no address specified for sample %s"
                        % self._sample.name)
                if not server.get("port"):
                    logger.error(
                        "requested a connection to a httpevent server, but no port specified for server %s"
                        % server)
                    raise ValueError(
                        "requested a connection to a httpevent server, but no port specified for server %s"
                        % server)
                if not server.get("key"):
                    logger.error(
                        "requested a connection to a httpevent server, but no key specified for server %s"
                        % server)
                    raise ValueError(
                        "requested a connection to a httpevent server, but no key specified for server %s"
                        % server)
                if not ((server.get("protocol") == "http") or
                        (server.get("protocol") == "https")):
                    logger.error(
                        "requested a connection to a httpevent server, but no protocol specified for server %s"
                        % server)
                    raise ValueError(
                        "requested a connection to a httpevent server, but no protocol specified for server %s"
                        % server)
                logger.debug(
                    "Validation Passed, Creating a requests object for server: %s"
                    % server.get("address"))

                setserver = {}
                setserver["url"] = "%s://%s:%s/services/collector" % (
                    server.get("protocol"),
                    server.get("address"),
                    server.get("port"),
                )
                setserver["header"] = "Splunk %s" % server.get("key")
                logger.debug("Adding server set to pool, server: %s" %
                             setserver)
                self.serverPool.append(setserver)
        else:
            raise NoServers(
                "outputMode %s but httpeventServers not specified for sample %s"
                % (self.name, self._sample.name))
Esempio n. 15
0
 def checkResults(self):
     for session in self.active_sessions:
         response = session.result()
         if response.status_code == 401 and "Invalid or Expired Bearer Token" in response.text:
             logger.error("scsAccessToken is invalid or expired")
             self.accessTokenExpired = True
             return False
         elif response.status_code != 200:
             logger.error(
                 f"Data transmisison failed with {response.status_code} and {response.text}"
             )
             return False
     logger.debug(f"Data transmission successful")
     return True
Esempio n. 16
0
 def multi_queue_it(self, count):
     logger.info("Entering multi-processing division of sample")
     numberOfWorkers = self.config.generatorWorkers
     logger.debug("Number of Workers: {0}".format(numberOfWorkers))
     # this is a redundant check, but will prevent some missed call to multi_queue without a valid setting
     if bool(self.sample.splitSample):
         # if split = 1, then they want to divide by number of generator workers, else use the splitSample
         if self.sample.splitSample == 1:
             logger.debug("SplitSample = 1, using all availible workers")
             targetWorkersToUse = numberOfWorkers
         else:
             logger.debug("SplitSample != 1, using {0} workers.".format(
                 self.sample.splitSample))
             targetWorkersToUse = self.sample.splitSample
     else:
         logger.debug(
             "SplitSample set to disable multithreading for just this sample."
         )
         self.single_queue_it()
     currentWorkerPrepCount = 0
     remainingCount = count
     targetLoopCount = int(count) / targetWorkersToUse
     while currentWorkerPrepCount < targetWorkersToUse:
         currentWorkerPrepCount = currentWorkerPrepCount + 1
         # check if this is the last loop, if so, add in the remainder count
         if currentWorkerPrepCount < targetWorkersToUse:
             remainingCount = count - targetLoopCount
         else:
             targetLoopCount = remainingCount
         self.single_queue_it(targetLoopCount)
Esempio n. 17
0
    def createConnections(self):
        self.serverPool = []
        if self.httpeventServers:
            for server in self.httpeventServers.get('servers'):
                if not server.get('address'):
                    logger.error(
                        'requested a connection to a httpevent server, but no address specified for sample %s'
                        % self._sample.name)
                    raise ValueError(
                        'requested a connection to a httpevent server, but no address specified for sample %s'
                        % self._sample.name)
                if not server.get('port'):
                    logger.error(
                        'requested a connection to a httpevent server, but no port specified for server %s'
                        % server)
                    raise ValueError(
                        'requested a connection to a httpevent server, but no port specified for server %s'
                        % server)
                if not server.get('key'):
                    logger.error(
                        'requested a connection to a httpevent server, but no key specified for server %s'
                        % server)
                    raise ValueError(
                        'requested a connection to a httpevent server, but no key specified for server %s'
                        % server)
                if not ((server.get('protocol') == 'http') or
                        (server.get('protocol') == 'https')):
                    logger.error(
                        'requested a connection to a httpevent server, but no protocol specified for server %s'
                        % server)
                    raise ValueError(
                        'requested a connection to a httpevent server, but no protocol specified for server %s'
                        % server)
                logger.debug(
                    "Validation Passed, Creating a requests object for server: %s"
                    % server.get('address'))

                setserver = {}
                setserver['url'] = "%s://%s:%s/services/collector" % (
                    server.get('protocol'), server.get('address'),
                    server.get('port'))
                setserver['header'] = "Splunk %s" % server.get('key')
                logger.debug("Adding server set to pool, server: %s" %
                             setserver)
                self.serverPool.append(setserver)
        else:
            raise NoServers(
                'outputMode %s but httpeventServers not specified for sample %s'
                % (self.name, self._sample.name))
Esempio n. 18
0
    def setOutputMetadata(self, event):
        if self._sample.sampletype == 'csv' and (event['index'] != self._sample.index
                                                 or event['host'] != self._sample.host
                                                 or event['source'] != self._sample.source
                                                 or event['sourcetype'] != self._sample.sourcetype):
            self._sample.index = event['index']
            self._sample.host = event['host']
            # Allow randomizing the host:
            if self._sample.hostToken:
                self.host = self._sample.hostToken.replace(self.host)

            self._sample.source = event['source']
            self._sample.sourcetype = event['sourcetype']
            logger.debug("Setting CSV parameters. index: '%s' host: '%s' source: '%s' sourcetype: '%s'" %
                              (self._sample.index, self._sample.host, self._sample.source, self._sample.sourcetype))
Esempio n. 19
0
 def flush(self, q):
     if len(q) > 0:
         logger.debug(
             "Flushing output for sample '%s' in app '%s' for queue '%s'" %
             (self._sample.name, self._app, self._sample.source))
         # Keep trying to open destination file as it might be touched by other processes
         data = ''.join(event['_raw'] for event in q if event.get('_raw'))
         while True:
             try:
                 with open(self.spoolPath, 'a') as dst:
                     dst.write(data)
                 break
             except Exception as e:
                 logger.error(str(e))
                 time.sleep(0.1)
         logger.debug("Queue for app '%s' sample '%s' written" %
                      (self._app, self._sample.name))
Esempio n. 20
0
 def rate(self):
     self.sample.count = int(self.sample.count)
     # Let generators handle infinite count for themselves
     if self.sample.count == -1 and self.sample.generator == "default":
         if not self.sample.sampleDict:
             logger.error(
                 "No sample found for default generator, cannot generate events"
             )
         self.sample.count = len(self.sample.sampleDict)
     count = self.sample.count
     rateFactor = self.adjust_rate_factor()
     ret = int(round(count * rateFactor, 0))
     if rateFactor != 1.0:
         logger.debug(
             "Original count: %s Rated count: %s Rate factor: %s"
             % (count, ret, rateFactor)
         )
     return ret
Esempio n. 21
0
 def flush(self, endOfInterval=False):
     """
     Flushes output buffer, unless endOfInterval called, and then only flush if we've been called
     more than maxIntervalsBeforeFlush tunable.
     """
     flushing = True
     if flushing:
         q = self._queue
         logger.debug("Flushing queue for sample '%s' with size %d" %
                      (self._sample.name, len(q)))
         self._queue = []
         outputer = self.outputPlugin(self._sample, self.output_counter)
         outputer.updateConfig(self.config)
         outputer.set_events(q)
         # When an outputQueue is used, it needs to run in a single threaded nature which requires to be put back
         # into the outputqueue so a single thread worker can execute it. When an outputQueue is not used, it can be
         # ran by multiple processes or threads. Therefore, no need to put the outputer back into the Queue. Just
         # execute it.
         # if outputPlugin must be used for useOutputQueue, use outputQueue regardless of user config useOutputQueue:
         if self.outputPlugin.useOutputQueue or self.config.useOutputQueue:
             try:
                 self.outputQueue.put(outputer)
             except Full:
                 logger.warning("Output Queue full, looping again")
         else:
             if self.config.splunkEmbedded:
                 tmp = [len(s['_raw']) for s in q]
                 if len(tmp) > 0:
                     metrics_logger.info({
                         'timestamp':
                         datetime.datetime.strftime(datetime.datetime.now(),
                                                    '%Y-%m-%d %H:%M:%S'),
                         'sample':
                         self._sample.name,
                         'events':
                         len(tmp),
                         'bytes':
                         sum(tmp)
                     })
                 tmp = None
             outputer.run()
         q = None
Esempio n. 22
0
    def __init__(self, sample, output_counter=None):
        OutputPlugin.__init__(self, sample, output_counter)

        if sample.fileName is None:
            logger.error(
                "outputMode file but file not specified for sample %s" %
                self._sample.name)
            raise ValueError(
                "outputMode file but file not specified for sample %s" %
                self._sample.name)

        self._file = sample.pathParser(sample.fileName)
        self._fileMaxBytes = sample.fileMaxBytes
        self._fileBackupFiles = sample.fileBackupFiles

        self._fileHandle = open(self._file, "a")
        self._fileLength = os.stat(self._file).st_size
        logger.debug(
            "Configured to log to '%s' with maxBytes '%s' with backupCount '%s'"
            % (self._file, self._fileMaxBytes, self._fileBackupFiles))
Esempio n. 23
0
    def processSampleLine(self, filehandler):
        """
        Due to a change in python3, utf-8 is now the default trying to read a file.  To get around this we need the
        process loop outside of the filehandler.
        :param filehandler:
        :return:
        """
        sampleLines = []
        if self.breaker == self.config.breaker:
            logger.debug("Reading raw sample '%s' in app '%s'" %
                         (self.name, self.app))
            sampleLines = filehandler.readlines()
        # 1/5/14 CS Moving to using only sampleDict and doing the breaking up into events at load time
        # instead of on every generation
        else:
            logger.debug(
                "Non-default breaker '%s' detected for sample '%s' in app '%s'"
                % (self.breaker, self.name, self.app))
            sampleData = filehandler.read()
            logger.debug(
                "Filling array for sample '%s' in app '%s'; sampleData=%s, breaker=%s"
                % (self.name, self.app, len(sampleData), self.breaker))
            try:
                breakerRE = re.compile(self.breaker, re.M)
            except:
                logger.error(
                    "Line breaker '%s' for sample '%s' in app '%s'"
                    " could not be compiled; using default breaker",
                    self.breaker,
                    self.name,
                    self.app,
                )
                self.breaker = self.config.breaker

            # Loop through data, finding matches of the regular expression and breaking them up into
            # "lines".  Each match includes the breaker itself.
            extractpos = 0
            searchpos = 0
            breakerMatch = breakerRE.search(sampleData, searchpos)
            while breakerMatch:
                logger.debug("Breaker found at: %d, %d" %
                             (breakerMatch.span()[0], breakerMatch.span()[1]))
                # Ignore matches at the beginning of the file
                if breakerMatch.span()[0] != 0:
                    sampleLines.append(
                        sampleData[extractpos:breakerMatch.span()[0]])
                    extractpos = breakerMatch.span()[0]
                searchpos = breakerMatch.span()[1]
                breakerMatch = breakerRE.search(sampleData, searchpos)
            sampleLines.append(sampleData[extractpos:])
        return sampleLines
Esempio n. 24
0
 def earliestTime(self):
     # First optimization, we need only store earliest and latest
     # as an offset of now if they're relative times
     if self._earliestParsed is not None:
         earliestTime = self.now() - self._earliestParsed
         logger.debug("Using cached earliest time: %s" % earliestTime)
     else:
         if (self.earliest.strip()[0:1] == "+"
                 or self.earliest.strip()[0:1] == "-"
                 or self.earliest == "now"):
             tempearliest = timeParser(self.earliest,
                                       timezone=self.timezone)
             temptd = self.now(realnow=True) - tempearliest
             self._earliestParsed = datetime.timedelta(
                 days=temptd.days, seconds=temptd.seconds)
             earliestTime = self.now() - self._earliestParsed
             logger.debug(
                 "Calulating earliestParsed as '%s' with earliestTime as '%s' and self.sample.earliest as '%s'"
                 % (self._earliestParsed, earliestTime, tempearliest))
         else:
             earliestTime = timeParser(self.earliest,
                                       timezone=self.timezone)
             logger.debug("earliestTime as absolute time '%s'" %
                          earliestTime)
     return earliestTime
Esempio n. 25
0
 def _transmitEvents(self, payloadstring):
     targetServer = []
     logger.debug("Transmission called with payloadstring: %s " %
                  payloadstring)
     if self.httpeventoutputmode == "mirror":
         targetServer = self.serverPool
     else:
         targetServer.append(random.choice(self.serverPool))
     for server in targetServer:
         logger.debug("Selected targetServer object: %s" % targetServer)
         url = server["url"]
         headers = {}
         headers["Authorization"] = server["header"]
         headers["content-type"] = "application/json"
         try:
             payloadsize = len(payloadstring)
             self.active_sessions.append(
                 self.session.post(url=url,
                                   data=payloadstring,
                                   headers=headers,
                                   verify=False))
         except Exception as e:
             logger.error("Failed for exception: %s" % e)
             logger.error(
                 "Failed sending events to url: %s  sourcetype: %s  size: %s"
                 % (url, self.lastsourcetype, payloadsize))
             logger.debug(
                 "Failed sending events to url: %s  headers: %s payload: %s"
                 % (url, headers, payloadstring))
             raise e
Esempio n. 26
0
    def setOutputMetadata(self, event):
        if self._sample.sampletype == "csv" and (
                event["index"] != self._sample.index
                or event["host"] != self._sample.host
                or event["source"] != self._sample.source
                or event["sourcetype"] != self._sample.sourcetype):
            self._sample.index = event["index"]
            self._sample.host = event["host"]
            # Allow randomizing the host:
            if self._sample.hostToken:
                self.host = self._sample.hostToken.replace(self.host)

            self._sample.source = event["source"]
            self._sample.sourcetype = event["sourcetype"]
            logger.debug(
                "Setting CSV parameters. index: '%s' host: '%s' source: '%s' sourcetype: '%s'"
                % (
                    self._sample.index,
                    self._sample.host,
                    self._sample.source,
                    self._sample.sourcetype,
                ))
Esempio n. 27
0
 def send_events(self, send_objects, startTime):
     """Ready events for output by replacing tokens and updating the output queue"""
     try:
         self._out.bulksend(send_objects)
         self._sample.timestamp = None
     except Exception as e:
         logger.exception("Exception {} happened.".format(type(e)))
         raise e
     try:
         # TODO: Change this logic so that we don't lose all events if an exception is hit (try/except/break?)
         endTime = datetime.datetime.now()
         timeDiff = endTime - startTime
         timeDiffFrac = "%d.%06d" % (timeDiff.seconds,
                                     timeDiff.microseconds)
         logger.debug("Interval complete, flushing feed")
         self._out.flush(endOfInterval=True)
         logger.debug(
             "Generation of sample '%s' in app '%s' completed in %s seconds."
             % (self._sample.name, self._sample.app, timeDiffFrac))
     except Exception as e:
         logger.exception("Exception {} happened.".format(type(e)))
         raise e
Esempio n. 28
0
    def _createConnections(self, sample):
        try:
            if hasattr(sample, "awsKeyId") and hasattr(sample, "awsSecretKey"):
                self._client = boto3.client(
                    "s3",
                    region_name=sample.awsRegion,
                    aws_access_key_id=sample.awsKeyId,
                    aws_secret_access_key=sample.awsSecretKey,
                )
                if self._client is None:
                    msg = """
                    [your_eventgen_stanza]
                    awsKeyId = YOUR_ACCESS_KEY
                    awsSecretKey = YOUR_SECRET_KEY
                    """

                    logger.error(
                        "Failed for init boto3 client: %s, you should define correct 'awsKeyId'\
                        and 'awsSecretKey' in eventgen conf %s" % msg)
                    raise Exception(msg)
            else:
                self._client = boto3.client("s3", region_name=sample.awsRegion)
        except Exception as e:
            logger.error("Failed for init boto3 client: exception =  %s" % e)
            raise e
        # Try list bucket method to validate if the connection works
        try:
            self._client.list_buckets()
        except botocore.exceptions.NoCredentialsError:
            msg = """
            [default]
            aws_access_key_id = YOUR_ACCESS_KEY
            aws_secret_access_key = YOUR_SECRET_KEY
            """

            logger.error("Failed for init boto3 client, you should create "
                         "'~/.aws/credentials' with credential info %s" % msg)
            raise
        logger.debug("Init conn done, conn = %s" % self._client)
Esempio n. 29
0
 def rate(self):
     perdayvolume = float(self.sample.perDayVolume)
     # Convert perdayvolume to bytes from GB
     perdayvolume = perdayvolume * 1024 * 1024 * 1024
     interval = self.sample.interval
     if self.sample.interval == 0:
         logger.debug("Running perDayVolume as if for 24hr period.")
         interval = 86400
     logger.debug("Current perDayVolume: %f,  Sample interval: %s" %
                  (perdayvolume, interval))
     intervalsperday = 86400 / interval
     perintervalvolume = perdayvolume / intervalsperday
     count = self.sample.count
     rateFactor = self.adjust_rate_factor()
     logger.debug("Size per interval: %s, rate factor to adjust by: %s" %
                  (perintervalvolume, rateFactor))
     ret = int(round(perintervalvolume * rateFactor, 0))
     if rateFactor != 1.0:
         logger.debug("Original count: %s Rated count: %s Rate factor: %s" %
                      (count, ret, rateFactor))
     logger.debug(
         "Finished rating, interval: {0}s, generation rate: {1} MB/interval"
         .format(interval, round((ret / 1024 / 1024), 4)))
     return ret
Esempio n. 30
0
 def updateConfig(self, config):
     OutputPlugin.updateConfig(self, config)
     try:
         if hasattr(self.config, 'httpeventServers') is False:
             if hasattr(self._sample, 'httpeventServers'):
                 self.config.httpeventServers = self._sample.httpeventServers
             else:
                 logger.error(
                     'outputMode %s but httpeventServers not specified for sample %s'
                     % (self.name, self._sample.name))
                 raise NoServers(
                     'outputMode %s but httpeventServers not specified for sample %s'
                     % (self.name, self._sample.name))
         # set default output mode to round robin
         if hasattr(
                 self.config,
                 'httpeventOutputMode') and self.config.httpeventOutputMode:
             self.httpeventoutputmode = config.httpeventOutputMode
         else:
             if hasattr(self._sample, 'httpeventOutputMode'
                        ) and self._sample.httpeventOutputMode:
                 self.httpeventoutputmode = self._sample.httpeventOutputMode
             else:
                 self.httpeventoutputmode = 'roundrobin'
         if hasattr(self.config, 'httpeventMaxPayloadSize'
                    ) and self.config.httpeventMaxPayloadSize:
             self.httpeventmaxsize = self.config.httpeventMaxPayloadSize
         else:
             if hasattr(self._sample, 'httpeventMaxPayloadSize'
                        ) and self._sample.httpeventMaxPayloadSize:
                 self.httpeventmaxsize = self._sample.httpeventMaxPayloadSize
             else:
                 self.httpeventmaxsize = 10000
         logger.debug("Currentmax size: %s " % self.httpeventmaxsize)
         if isinstance(config.httpeventServers, str):
             self.httpeventServers = json.loads(config.httpeventServers)
         else:
             self.httpeventServers = config.httpeventServers
         logger.debug("Setting up the connection pool for %s in %s" %
                      (self._sample.name, self._app))
         self.createConnections()
         logger.debug("Pool created.")
         logger.debug("Finished init of %s plugin." % self.name)
     except Exception as e:
         logger.exception(str(e))