def stream_events(self, inputs, ew): for input_name, input_item in inputs.inputs.iteritems(): # Get fields from the InputDefinition object username = input_item["jss_username"] password = input_item["jss_password"] url = input_item["jss_url"] api_call = input_item["api_call"] search_name = input_item["search_name"] # Host and index should always be included in the stanza by splunk index = input_item["index"] host = input_item["host"] if api_call == "computer": jss_url = "%s/JSSResource/advancedcomputersearches/name/%s" % ( url, search_name) elif api_call == "mobile_device": jss_url = "%s/JSSResource/advancedmobiledevicesearches/name/%s" % ( url, search_name) else: splunk.EventWriter.log(ew, splunk.EventWriter.ERROR, "api_call: %s not specified correctly" % api_call) return # Log that we are beginning to retrieve data. splunk.EventWriter.log(ew, splunk.EventWriter.INFO, "Started retrieving data for user %s" % username) response = requests.get(jss_url, auth=(username, password), headers={'accept': 'application/json'}) response.raise_for_status() jsondata = response.json() if api_call == "computer": computers = jsondata["advanced_computer_search"]["computers"] for computer in computers: event = splunk.Event( data=json.dumps(computer), stanza=computer["Computer_Name"], index=index, host=host ) # Tell the EventWriter to write this event ew.write_event(event) elif api_call == "mobile_device": mobile_devices = \ jsondata["advanced_mobile_device_search"]["mobile_devices"] for mobile_device in mobile_devices: event = splunk.Event( data=json.dumps(mobile_device), stanza=mobile_device["name"], index=index, host=host ) ew.write_event(event)
def stream_events(self, inputs, event_writer): """Writes event objects to event_writer.""" try: for input_name, input_item in inputs.inputs.items(): log.source = input_name log.setLevel(input_item['log_level']) if input_item['index'] == 'default': self.index = 'main' else: self.index = input_item['index'] self.source_type = input_item['sourcetype'] self.session_key = self._input_definition.metadata[ 'session_key'] self.mgmt_endpoint = urlparse( self._input_definition.metadata['server_uri']) log.info('Initializing ' + input_name) event_count = 0 self.source = input_name for item in self.monobank(input_item['init_date'], input_item['card_id'], input_item['token']): splunk_event = modularinput.Event( data=item, index=input_item['index'], sourcetype=input_item['sourcetype']) event_writer.write_event(splunk_event) event_count += 1 log.info('Total events ingested: ' + str(event_count)) except Exception as exception: log.exception(exception) raise
def write_events(self, file_name, outbound_dir, events, index='main', source_type=None): """ write results to proper destination based on where script runs :param file_name: :param outbound_dir: :param events: """ file_type = file_name.split('.')[-1] file_name = file_name.replace(file_type, 'json') outbound_file_path = os.path.join(outbound_dir, file_name) count = 0 with open(outbound_file_path, 'w') as json_file: for event in events: for item in json.loads(event): if self.ew is not None: # write event to Splunk splunk_event = modularinput.Event( data=json.dumps(item), index=index, sourcetype=source_type) self.ew.write_event(splunk_event) count += 1 else: # print event to STDOUT print(item) json_file.write(event) json_file.close() return file_name, count
def write_event(asset_data, site_name, ew, index, logger): """ This function write events to the Splunk :param asset_data: list of assets :param site_name: Site name associated with the assets :param ew: Event Writer object :param index: Index on which data will be written :param logger: Logger object """ sourcetype = 'lansweeper:asset:v2' try: logger.info( 'Writting assets data to Splunk for site={} asset_count={}'.format( site_name, len(asset_data))) for asset in asset_data: asset['site_name'] = site_name event = smi.Event(data=json.dumps(asset), sourcetype=sourcetype, index=index) ew.write_event(event) logger.info('Successfully indexed the asset data') except Exception as exception: logger.error( "Error writing event to Splunk, error={}".format(exception))
def new_event(self, data, time=None, host=None, index=None, source=None, sourcetype=None, done=True, unbroken=True): ''' :param data: ``string``, the event's text. :param time: ``float``, time in seconds, including up to 3 decimal places to represent milliseconds. :param host: ``string``, the event's host, ex: localhost. :param index: ``string``, the index this event is specified to write to, or None if default index. :param source: ``string``, the source of this event, or None to have Splunk guess. :param sourcetype: ``string``, source type currently set on this event, or None to have Splunk guess. :param done: ``boolean``, is this a complete ``Event``? False if an ``Event`` fragment. :param unbroken: ``boolean``, Is this event completely encapsulated in this ``Event`` object? ''' return smi.Event(data=data, time=time, host=host, index=index, source=source, sourcetype=sourcetype, done=done, unbroken=unbroken)
def stream_events(self, inputs, ew): """overloaded splunklib modularinput method""" # use simple rest call to load the events self.header = {} self.data = {} self.url = 'https://www.statuscake.com/API/Tests/' self.rest_method = 'GET' self.header["TestID"] = "1105966" self.header["Username:"******"Camsupport247" self.header["API:"] = "TwjxBcNOudeeFu49BOLe" self.input_name, self.input_items = inputs.inputs.popitem() self.output_index = self.input_items['index'] self.output_sourcetype = self.input_items['sourcetype'] try: http_cli = httplib2.Http(timeout=10, disable_ssl_certificate_validation=True) resp, content = http_cli.request(self.url, method=self.rest_method, body=urllib.urlencode(self.data), headers=self.header) event = smi.Event(source=self.input_name, index=self.output_index, sourcetype=self.output_sourcetype, data=content.decode('utf-8', errors='ignore')) ew.write_event(event) except Exception as e: raise e
def stream_events(self, config, ew): # Splunk Enterprise calls the modular input, # streams XML describing the inputs to stdin, # and waits for XML on stdout describing events. with CheckpointDB(config.metadata["checkpoint_dir"]) as cpoint: for input_name, input_item in config.inputs.iteritems(): default_date = input_item["default_checkpoint_date"] url = input_item["url"] date_field = input_item["date_field"] limit = input_item["limit"] checkpoint_date = cpoint.get_checkpoint( input_name, default_date) new_checkpoint = checkpoint_date ew.log( modularinput.EventWriter.INFO, "Making request to Socrata for input {} since {}".format( input_name, checkpoint_date)) for data in self.fetch_data(url, date_field, checkpoint_date, limit, ew): datestring = data[date_field] dtime = datetime.datetime.strptime(datestring, "%Y-%m-%dT%H:%M:%S.%f") e = modularinput.Event() e.stanza = input_name e.time = time.mktime(dtime.timetuple()) e.data = json.dumps(data) ew.write_event(e) if new_checkpoint < datestring: new_checkpoint = datestring encoded_checkpoint = new_checkpoint.encode('ascii', 'ignore') ew.log(modularinput.EventWriter.INFO, "Moving checkpoint to {}".format(encoded_checkpoint)) cpoint.set_checkpoint(input_name, encoded_checkpoint)
def stream_events(self, config, eventWriter): for input_name, input_item in config.inputs.iteritems(): for i in range(1, 50): event = modularinput.Event() event.stanza = input_name event.data = "Hello {}".format(input_item["name_to_greet"]) event.sourcetype = "hello_world" event.time = time.mktime(time.localtime()) eventWriter.write_event(event)
def stream_events(self, inputs, ew): input_items = [{'count': len(inputs.inputs)}] for input_name, input_item in inputs.inputs.items(): input_item['name'] = input_name input_items.append(input_item) event = smi.Event( data=json.dumps(input_items), sourcetype='example_input_one', ) ew.write_event(event)
def collect_events(helper, ew): """overloaded splunklib modularinput method""" # use simple rest call to load the events command = 'curl --unix-socket /var/run/docker.sock http:/${docker_api_version}/containers/json?all=1' input_name = helper.get_input_type() stanzas = helper.get_input_stanza() for stanza_name, stanza_params in stanzas.iteritems(): # should unescape the xml text content hparser = HTMLParser.HTMLParser() input_params = {} for k, v in stanza_params.iteritems(): input_params[k] = '' if v is None else hparser.unescape(str(v)) stanza_params = input_params output_index = stanza_params['index'] output_sourcetype = stanza_params['sourcetype'] # parametrization for the command temp = mako.template.Template(command) translated_command = temp.render(**stanza_params) child = subprocess.Popen(translated_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) while child.poll() is None: content = child.stdout.read() if content: event = helper.new_event(source=input_name, index=output_index, data=content, unbroken=True, done=False) ew.write_event(event) # read the last part of events content = child.stdout.read() if content: event = smi.Event(source=input_name, index=output_index, sourcetype=output_sourcetype, data=content, unbroken=True, done=True) ew.write_event(event) if child.returncode != 0: err_prefix = "Command exit code is {}. Fail to run command '{}'.".format( child.returncode, translated_command) errmsg = child.stderr.read() if errmsg: errmsg = err_prefix + "error messag:" + errmsg else: errmsg = err_prefix raise RuntimeError(errmsg)
def _write_done_event(checkpoint=False): if State.writing: logger.log(logging.DEBUG, "create Event") event = smi.Event(source=reader.source_name, data='', unbroken=True, done=True) logger.log(logging.DEBUG, "write_done_event") self._ew.write_event(event) logger.log(logging.DEBUG, "write_done_event done") State.writing = False if checkpoint: checkpoint_item.position = reader.position
def _stream_events(self, inputs, ew): """helper function""" loglevel = get_level('aws_config', self.service.token, appName=APPNAME) set_log_level(loglevel) logger.log( logging.INFO, "STARTED: {}".format(len(sys.argv) > 1 and sys.argv[1] or '')) logger.log(logging.DEBUG, "Start streaming.") self._ew = ew if os.name == 'nt': import win32api win32api.SetConsoleCtrlHandler(self._exit_handler, True) else: import signal signal.signal(signal.SIGTERM, self._exit_handler) signal.signal(signal.SIGINT, self._exit_handler) # because we only support one stanza... self.input_name, self.input_items = inputs.inputs.popitem() self.enable_additional_notifications = ( self.input_items.get('enable_additional_notifications') or 'false').lower() in ('1', 'true', 'yes', 'y', 'on') # self.configure_blacklist() base_sourcetype = self.input_items.get("sourcetype") or "aws:config" session_key = self.service.token key_id, secret_key = tac.get_aws_creds(self.input_items, inputs.metadata, {}) # Try S3 Connection s3_conns = {} # Create SQS Connection sqs_conn = s3util.connect_sqs(self.input_items['aws_region'], key_id, secret_key, self.service.token) if sqs_conn is None: # No recovering from this... logger.log( logging.FATAL, "Invalid SQS Queue Region: {}".format( self.input_items['aws_region'])) raise Exception("Invalid SQS Queue Region: {}".format( self.input_items['aws_region'])) else: logger.log(logging.DEBUG, "Connected to SQS successfully") try: while not self._canceled: sqs_queue = s3util.get_queue(sqs_conn, self.input_items['sqs_queue']) if sqs_queue is None: try: # verify it isn't an auth issue sqs_queues = sqs_conn.get_all_queues() except boto.exception.SQSError as e: logger.log( logging.FATAL, "sqs_conn.get_all_queues(): {} {}: {} - {}".format( e.status, e.reason, e.error_code, e.error_message)) raise else: logger.log( logging.FATAL, "sqs_conn.get_queue(): Invalid SQS Queue Name: {}". format(self.input_items['sqs_queue'])) break sqs_queue.set_message_class(boto.sqs.message.RawMessage) # num_messages=10 was chosen based on aws pricing faq. # see request batch pricing: http://aws.amazon.com/sqs/pricing/ notifications = sqs_queue.get_messages(num_messages=10, visibility_timeout=20, wait_time_seconds=20) logger.log( logging.DEBUG, "Length of notifications in sqs=%s for region=%s is: %s" % (self.input_items['sqs_queue'], self.input_items['aws_region'], len(notifications))) start_time = time.time() completed = [] failed = [] stats = {'written': 0} # if not notifications or self._canceled: # continue # Exit if SQS returns nothing. Wake up on interval as specified on inputs.conf if len(notifications) == 0: self._canceled = True break for notification in notifications: if self._canceled or self._check_orphan(): break try: envelope = json.loads(notification.get_body()) # What do we do with non JSON data? Leave them in the queue but recommend customer uses a SQS queue only for AWS Config? except Exception as e: failed.append(notification) logger.log( logging.ERROR, "problems decoding notification JSON string: {} {}" .format(type(e).__name__, e)) continue if not isinstance(envelope, dict): failed.append(notification) logger.log( logging.ERROR, "This doesn't look like a valid Config message. Please check SQS settings." ) continue if all(key in envelope for key in ("Type", "MessageId", "TopicArn", "Message")) and isinstance( envelope['Message'], basestring): logger.log( logging.DEBUG, "This is considered a Config notification.") try: envelope = json.loads(envelope['Message']) if not isinstance(envelope, dict): failed.append(notification) logger.log( logging.ERROR, "This doesn't look like a valid Config message. Please check SQS settings." ) continue except Exception as e: failed.append(notification) logger.log( logging.ERROR, "problems decoding message JSON string: {} {}". format(type(e).__name__, e)) continue if 'messageType' in envelope: logger.log( logging.DEBUG, "This is considered a Config message. 'Raw Message Delivery' may be 'True'." ) message = envelope else: failed.append(notification) logger.log( logging.ERROR, "This doesn't look like a valid Config message. Please check SQS settings." ) continue ## Process: config notifications, history and snapshot notifications (additional) # Process notifications with payload, check ConfigurationItemChangeNotification msg_type = message.get('messageType', '') if msg_type == 'ConfigurationItemChangeNotification': logger.log( logging.DEBUG, "Consuming configuration change data in SQS payload." ) # determine _time for the event configurationItem = message.get( 'configurationItem', '') configurationItemCaptureTime = configurationItem.get( 'configurationItemCaptureTime', '') event_time = int( calendar.timegm( time.strptime( configurationItemCaptureTime.replace( "Z", "GMT"), "%Y-%m-%dT%H:%M:%S.%f%Z"))) # write the event event = smi.Event(data=json.dumps(message), time=event_time, sourcetype=base_sourcetype + ":notification") ew.write_event(event) stats['written'] += 1 completed.append(notification) # Process ConfigurationHistoryDeliveryCompleted notifications by fetching data from S3 buckets elif msg_type == 'ConfigurationHistoryDeliveryCompleted' and message.get( 's3ObjectKey', '') != '' and message.get( 's3Bucket', '') != '': logger.log( logging.DEBUG, "Consuming configuration history change data in S3 bucket." ) bucket_name = message.get('s3Bucket', '') key = message.get('s3ObjectKey', '') logger.log( logging.INFO, "Consume config history from s3 with s3Bucket '{0}' s3ObjectKey '{1}'" .format(bucket_name, key)) completed_buf, failed_buf = self.process_confighistory( s3_conns, key_id, secret_key, session_key, notification, bucket_name, key) completed.extend(completed_buf) failed.extend(failed_buf) logger.log( logging.DEBUG, "Length of completed after reaching into s3bucket: {0}" .format(len(completed))) # Process ConfigurationSnapshotDeliveryCompleted notifications by fetching data from S3 buckets elif msg_type == 'ConfigurationSnapshotDeliveryCompleted' and message.get( 's3ObjectKey', '') != '' and message.get( 's3Bucket', '') != '': logger.log( logging.DEBUG, "Consuming configuration snapshot data in S3 bucket." ) bucket_name = message.get('s3Bucket', '') key = message.get('s3ObjectKey', '') logger.log( logging.INFO, "Consume config snapshot from s3 with s3Bucket '{0}' s3ObjectKey '{1}'" .format(bucket_name, key)) completed_buf, failed_buf = self.process_confighistory( s3_conns, key_id, secret_key, session_key, notification, bucket_name, key) completed.extend(completed_buf) failed.extend(failed_buf) logger.log( logging.DEBUG, "Length of completed after reaching into s3bucket: {0}" .format(len(completed))) # # Ingest all other notification of types: ConfigurationSnapshot*etc. but only when enable_additional_notifications is true. # elif self.enable_additional_notifications and msg_type.startswith("ConfigurationSnapshot"): # logger.log(logging.DEBUG, "Consuming additional notifications enabled") # notificationCreationTime = message.get('notificationCreationTime', '') # event_time = int(calendar.timegm(time.strptime(notificationCreationTime.replace("Z", "GMT"), "%Y-%m-%dT%H:%M:%S.%f%Z"))) # # write the event # event = smi.Event(data=json.dumps(message), # time=event_time, # sourcetype=base_sourcetype+":additional") # ew.write_event(event) # stats['written'] += 1 # completed.append(notification) elif msg_type in [ 'ComplianceChangeNotification', 'ConfigurationSnapshotDeliveryStarted', 'ConfigRulesEvaluationStarted' ]: logger.log( logging.INFO, 'Ignore this message and delete the sqs messages.') completed.append(notification) else: failed.append(notification) logger.log( logging.ERROR, "This doesn't look like a Config notification or message. Please check SQS settings." ) continue notification_delete_errors = 0 # Delete ingested notifications if completed: logger.log( logging.INFO, "Delete {0} completed messages from SQS".format( len(completed))) br = sqs_queue.delete_message_batch(completed) if br.errors: notification_delete_errors = len(br.errors) if failed: logger.log(logging.DEBUG, "sqs_queue.delete_message_batch(failed)") logger.log( logging.INFO, "Delete {0} failed messages from SQS".format( len(failed))) br = sqs_queue.delete_message_batch(failed) logger.log(logging.DEBUG, "sqs_queue.delete_message_batch done") if br.errors: notification_delete_errors = len(br.errors) failed_messages = ','.join([m.get_body() for m in failed]) logger.log( logging.WARN, "Invalid notifications have been removed from SQS : %s", failed_messages) else: logger.log(logging.INFO, ( "{} completed, {} failed while processing a notification batch of {}" " [{} errors deleting {} notifications]" " Elapsed: {:.3f}s").format( len(completed), len(failed), len(notifications), notification_delete_errors, len(completed), time.time() - start_time)) except Exception as e: logger.log(logging.FATAL, "Outer catchall: %s: %s", type(e).__name__, e)
def process_notifications(self, s3_conn, notifications): """Extract events from CloudTrail S3 logs referenced in SNS notifications.""" completed = [] failed = [] keys_to_delete = {} for notification in notifications: if self._canceled: break try: envelope = json.loads(notification.get_body()) except Exception as e: failed.append(notification) logger.log( logging.ERROR, "problems decoding notification JSON string: {} {}".format( type(e).__name__, e)) continue if not isinstance(envelope, dict): failed.append(notification) logger.log( logging.ERROR, "This doesn't look like a valid CloudTrail message. Please check SQS settings." ) continue if all(key in envelope for key in ("Type", "MessageId", "TopicArn", "Message")) and isinstance( envelope['Message'], basestring): logger.log(logging.DEBUG, "This is considered a CloudTrail notification.") try: envelope = json.loads(envelope['Message']) if not isinstance(envelope, dict): failed.append(notification) logger.log( logging.ERROR, "This doesn't look like a valid CloudTrail message. Please check SQS settings." ) continue except Exception as e: failed.append(notification) logger.log( logging.ERROR, "problems decoding message JSON string: {} {}".format( type(e).__name__, e)) continue if all(key in envelope for key in ("s3Bucket", "s3ObjectKey")): logger.log( logging.DEBUG, "This is considered a CloudTrail message. 'Raw Message Delivery' may be 'True'." ) message = envelope else: failed.append(notification) logger.log( logging.ERROR, "This doesn't look like a CloudTrail notification or message. Please check SQS settings." ) continue try: # defer validation to minimize queries. bucket_name = message['s3Bucket'] logger.log(logging.DEBUG, "s3_conn.get_bucket %s", bucket_name) s3_bucket = s3_conn.get_bucket(bucket_name, validate=False) logger.log(logging.DEBUG, "s3_conn.get_bucket done") for key in message['s3ObjectKey']: logger.log(logging.DEBUG, "s3_conn.get_key %s", key) s3_file = s3_bucket.get_key(key) logger.log(logging.DEBUG, "s3_conn.get_key done") if s3_file is None: file_json = {} else: logger.log(logging.DEBUG, "load gzip file") with io.BytesIO(s3_file.read()) as bio: with gzip.GzipFile(fileobj=bio) as gz: file_json = json.loads(gz.read()) logger.log(logging.DEBUG, "load gzip file done") except boto.exception.S3ResponseError as e: # TODO: if e.error_code == 'NoSuchBucket' --- should we delete from queue also? # Or is this something that should be left for SQS Redrive? # We remove files from s3 before deleting the notifications, so it is possible in a fail # case to create this scenario. loglevel = logging.ERROR if e.status == 404 and e.reason == 'Not Found' and e.error_code in ( 'NoSuchKey', ): completed.append(notification) loglevel = logging.WARN else: failed.append(notification) edetail = e.body if e.body: try: elem = ET.fromstring(e.body) edetail = elem.findtext('Key') or elem.findtext( 'BucketName') or '' except Exception: logger.log( logging.WARN, "Failed to parse the content from S3ResponseError : {}" .format(e.body)) logger.log( loglevel, "{}: {} {}: {} - {}: {}".format( type(e).__name__, e.status, e.reason, e.error_code, e.error_message, edetail)) continue except ValueError as e: failed.append(notification) logger.log( logging.ERROR, "problems reading json from s3:{}/{}: {} {}".format( message['s3Bucket'], key, type(e).__name__, e)) continue except IOError as e: failed.append(notification) logger.log( logging.ERROR, "problems unzipping from s3:{}/{}: {} {}".format( message['s3Bucket'], key, type(e).__name__, e)) continue try: records = file_json.get('Records', []) logger.log( logging.INFO, "processing {} records in s3:{}/{}".format( len(records), bucket_name, key)) except KeyError as e: failed.append(notification) logger.log( logging.ERROR, "json not in expected format from s3:{}/{}: {} {}".format( bucket_name, key, type(e).__name__, e)) continue stats = {'written': 0, 'redirected': 0, 'discarded': 0} try: for idx, record in enumerate(records): if self.blacklist_pattern is None or re.search( self.blacklist_pattern, record["eventName"]) is None: logger.log( logging.DEBUG, "writing event {} with timestamp {}".format( record['eventName'], record['eventTime'])) event = smi.Event( data=json.dumps(record), time=int( calendar.timegm( time.strptime( record['eventTime'].replace( "Z", "GMT"), "%Y-%m-%dT%H:%M:%S%Z"))), source="s3://{}/{}".format(bucket_name, key)) self._ew.write_event(event) stats['written'] += 1 elif self.input_items.get('excluded_events_index'): event = smi.Event( data=json.dumps(record), time=int( calendar.timegm( time.strptime( record['eventTime'].replace( "Z", "GMT"), "%Y-%m-%dT%H:%M:%S%Z"))), index=self.input_items['excluded_events_index'], source="s3://{}/{}".format(bucket_name, key)) self._ew.write_event(event) stats['redirected'] += 1 else: logger.log( logging.DEBUG, "blacklisted event" " {} with timestamp {} being discarded".format( record['eventName'], record['eventTime'])) stats['discarded'] += 1 logger.log(logging.INFO, ( "fetched {} records, wrote {}, discarded {}, redirected {}" " from s3:{}/{}").format(len(records), stats['written'], stats['discarded'], stats['redirected'], bucket_name, key)) completed.append(notification) except IOError as e: if not self._canceled: failed.append(notification) if stats['written'] or stats['redirected']: logger.log( logging.ERROR, "likely future duplicates:" " {}while processing record {} of {} for s3:{}/{}: {} {}" .format( "cancellation request received " if self._canceled else '', idx + 1, len(records), bucket_name, key, type(e).__name__, e)) break else: if self.remove_files_when_done: if bucket_name not in keys_to_delete: keys_to_delete[bucket_name] = [] keys_to_delete[bucket_name].append(key) return completed, keys_to_delete, failed
def _stream_s3_item(self, key, checkpoint_item): """Handles streaming single S3 item to Splunk @param key: S3 key @param checkpoint_item: @return: """ class State(object): writing = False def _start(reader_type): pass def _stop(reader_type, position): _write_done_event(checkpoint=True) def _write_done_event(checkpoint=False): if State.writing: logger.log(logging.DEBUG, "create Event") event = smi.Event(source=reader.source_name, data='', unbroken=True, done=True) logger.log(logging.DEBUG, "write_done_event") self._ew.write_event(event) logger.log(logging.DEBUG, "write_done_event done") State.writing = False if checkpoint: checkpoint_item.position = reader.position try: retry_num = 0 current_reader_pos = 0 # If user set an encoding, use the user setting, or default to 'auto'. Used in incremental decoder. character_set = self.input_items.get('character_set') or 'auto' logger.log(logging.DEBUG, "character_set = %s", character_set) try: inc_decoder = codecs.getincrementaldecoder(character_set)( errors='replace') except LookupError: # If use 'auto' or encoding is invalid, auto decode utf8 with/without BOM, utf16/32 with BOM logger.log( logging.DEBUG, "character_set is auto or does not exist, try auto UTF detect" ) reader = taaws.s3readers.get_stream_reader( key, source_start_func=_start, source_stop_func=_stop) data = reader.read(1024) inc_decoder = codecs.getincrementaldecoder( detect_unicode_by_bom(data))(errors='replace') key.close(fast=True) while True: try: if self._canceled: break logger.log(logging.DEBUG, "taaws.s3readers.get_stream_reader") reader = taaws.s3readers.get_stream_reader( key, source_start_func=_start, source_stop_func=_stop) logger.log(logging.DEBUG, "taaws.s3readers.get_stream_reader done") if current_reader_pos: logger.log(logging.DEBUG, "reader.seek_position : %s", str(current_reader_pos)) reader.seek_position(current_reader_pos) elif checkpoint_item.position: logger.log(logging.DEBUG, "reader.seek_position : %s", str(checkpoint_item.position)) reader.seek_position(checkpoint_item.position) logger.log(logging.DEBUG, "reader.seek_position done") while True: if self._canceled: break data = reader.read(8192) logger.log(logging.DEBUG, "reader.read(8192) done") # If decoder is not initialized (meaning that this is the first loop with file header), # initialize it with data if inc_decoder is None: inc_decoder = codecs.getincrementaldecoder( detect_unicode_by_bom(data))(errors='replace') if not data: checkpoint_item.eof_reached = True break logger.log(logging.DEBUG, "create Event") event = smi.Event(source=reader.source_name, data=inc_decoder.decode(data), unbroken=True, done=False) logger.log( logging.DEBUG, "write event data length={} ".format(len(data))) self._ew.write_event(event) logger.log(logging.DEBUG, "write event data done") current_reader_pos = reader.position State.writing = True ## Adding by Gang to fix the dead loop bug if checkpoint_item.eof_reached: break except IOError as e: # unchecked, it could also be stdout if self._canceled: break retry_num += 1 if retry_num > 3: raise key.close(fast=True) logger.log( logging.WARN, "Will Retry Resume: bucket: %r key: %r etag: %s retry_resume_num: %d " " checkpoint_pos: %d, current_reader_pos: %d Exception: %s: %s", key.bucket.name, key.name, key.etag, retry_num, checkpoint_item.position, current_reader_pos, type(e).__name__, e) self._do_delay(2**(retry_num - 1)) except Exception as e: raise finally: _write_done_event(checkpoint=False) if not self._canceled: checkpoint_item.attempt_count += 1
def stream_events(self, inputs, ew): """overloaded splunklib modularinput method""" logger.log(logging.DEBUG, "Start streaming.") self._ew = ew if os.name == 'nt': import win32api win32api.SetConsoleCtrlHandler(self._exit_handler, True) else: import signal signal.signal(signal.SIGTERM, self._exit_handler) signal.signal(signal.SIGINT, self._exit_handler) # because we only support one stanza... self.input_name, self.input_items = inputs.inputs.popitem() self.enable_additional_notifications = (self.input_items.get('enable_additional_notifications')or 'false').lower() in ( '1', 'true', 'yes', 'y', 'on') # self.configure_blacklist() # logger.log(logging.DEBUG, "blacklist regex for eventNames is {}".format(self.blacklist)) session_key = self.service.token aws_account_name = self.input_items.get("aws_account") or "default" (key_id, secret_key) = self.get_access_key_pwd_real(session_key=session_key, aws_account_name=aws_account_name) # Try S3 Connection s3_conn = taaws.s3util.connect_s3(key_id, secret_key, session_key) # Create SQS Connection sqs_conn = taaws.s3util.connect_sqs(self.input_items['aws_region'], key_id, secret_key, session_key) if sqs_conn is None: # No recovering from this... logger.log(logging.FATAL, "Invalid SQS Queue Region: {}".format(self.input_items['aws_region'])) raise Exception("Invalid SQS Queue Region: {}".format(self.input_items['aws_region'])) else: logger.log(logging.DEBUG, "Connected to SQS successfully") try: while not self._canceled: #logger.log(logging.INFO, "The outer loop has started...") if self._canceled: break sqs_queue = sqs_conn.get_queue(self.input_items['sqs_queue']) if sqs_queue is None: try: # verify it isn't an auth issue sqs_queues = sqs_conn.get_all_queues() except boto.exception.SQSError as e: logger.log(logging.FATAL, "sqs_conn.get_all_queues(): {} {}: {} - {}".format( e.status, e.reason, e.error_code, e.error_message)) raise else: logger.log(logging.FATAL, "sqs_conn.get_queue(): Invalid SQS Queue Name: {}".format( self.input_items['sqs_queue'])) raise #sqs_queue.set_message_class(boto.sqs.message.RawMessage) sqs_queue.set_message_class(RawMessage) # num_messages=10 was chosen based on aws pricing faq. # see request batch pricing: http://aws.amazon.com/sqs/pricing/ notifications = sqs_queue.get_messages(num_messages=10, visibility_timeout=20, wait_time_seconds=20) logger.log(logging.DEBUG, "Length of notifications is: %s" % len(notifications)) start_time = time.time() completed = [] failed = [] stats = {'written': 0} # if not notifications or self._canceled: # Exit if SQS returns nothing. Wake up on interval as specified on inputs.conf if len(notifications) == 0: self._canceled = True break for notification in notifications: if self._canceled: break try: message = notification.get_body() logger.log(logging.DEBUG, message) event=smi.Event((message), source="aws:evidentio:notification") ew.write_event(event) stats['written'] += 1 completed.append(notification) # What do we do with non JSON data? Leave them in the queue but recommend customer uses a SQS queue only for AWS Evident.io? except Exception as e: failed.append(notification) logger.log(logging.ERROR, "Problems decoding JSON in notification: {} {}.".format( type(e).__name__, e)) continue notification_delete_errors = 0 # Delete ingested notifications if completed: br = sqs_queue.delete_message_batch(completed) if br.errors: notification_delete_errors = len(br.errors) else: logger.log(logging.INFO, ("{} completed, {} failed while processing a notification batch of {}" " [{} errors deleting {} notifications]" " Elapsed: {:.3f}s").format( len(completed), len(failed), len(notifications), notification_delete_errors, len(completed), time.time() - start_time)) except Exception as e: logger.log(logging.FATAL, "Outer catchall: %s: %s", type(e).__name__, e) raise
def process_confighistory(self, s3_conns, key_id, secret_key, session_key, notification, bucket_name, key): """Extract events from AWS Config S3 logs referenced in SNS notifications.""" completed = [] failed = [] file_json = {} try: # defer validation to minimize queries. if bucket_name not in s3_conns: s3_conns[bucket_name] = _create_s3_connection( key_id, secret_key, session_key, bucket_name, key) s3_bucket = s3_conns[bucket_name].get_bucket(bucket_name) s3_file = s3_bucket.get_key(key) if s3_file is not None: with io.BytesIO(s3_file.read()) as bio: with gzip.GzipFile(fileobj=bio) as gz: file_json = json.loads(gz.read()) else: logger.log(logging.WARN, "S3 key not found", bucket=bucket_name, key=key) except boto.exception.S3ResponseError as e: # TODO: if e.error_code == 'NoSuchBucket' --- should we delete from queue also? # Or is this something that should be left for SQS Redrive? loglevel = logging.ERROR if e.status == 404 and e.reason == 'Not Found' and e.error_code in ( 'NoSuchKey', ): completed.append(notification) loglevel = logging.WARN else: failed.append(notification) edetail = e.body if e.body: try: elem = ET.fromstring(e.body) edetail = elem.findtext('Key') or elem.findtext( 'BucketName') or '' except Exception: logger.log( logging.WARN, "Failed to parse the content from S3ResponseError : {}" .format(e.body)) logger.log( loglevel, "{}: {} {}: {} - {}: {} {}".format( type(e).__name__, e.status, e.reason, e.error_code, e, e.error_message, edetail)) except ValueError as e: failed.append(notification) logger.log( logging.ERROR, "Problems reading json from s3:{}/{}: {} {}".format( bucket_name, key, type(e).__name__, e)) except IOError as e: failed.append(notification) logger.log( logging.ERROR, "Problems unzipping from s3:{}/{}: {} {}".format( bucket_name, key, type(e).__name__, e)) try: configurationItems = file_json.get('configurationItems', []) logger.log( logging.INFO, "Processing {} configurationItems in s3:{}/{}".format( len(configurationItems), bucket_name, key)) except KeyError as e: failed.append(notification) logger.log( logging.ERROR, "JSON not in expected format from s3:{}/{}: {} {}".format( bucket_name, key, type(e).__name__, e)) stats = {'written': 0} source = os.path.basename(key) # Extract payload elements from history files try: for configurationItem in configurationItems: configurationItemCaptureTime = configurationItem.get( 'configurationItemCaptureTime', '') event_time = int( calendar.timegm( time.strptime( configurationItemCaptureTime.replace("Z", "GMT"), "%Y-%m-%dT%H:%M:%S.%f%Z"))) #write the event event = smi.Event(data=json.dumps(configurationItem), time=event_time, source=source) self._ew.write_event(event) stats['written'] += 1 logger.log(logging.INFO, ("Fetched {} configurationItems, wrote {}" " from s3:{}/{}").format(len(configurationItems), stats['written'], bucket_name, key)) completed.append(notification) except IOError as e: if not self._canceled: failed.append(notification) return completed, failed
def _flush(self): # nothing to do? if not len(self._buffer) and not len(self._mqs_pending_flush): return start_time = time.time() total_stats = 0 total_events = 0 try: try: for buffer_key, buffer_data in sorted(self._buffer.items()): if not buffer_data: # this should never happen. right? logger.log( logging.ERROR, "empty data for buffer_key: {}".format(buffer_key)) continue mqs, stat = buffer_data[0] splunk_time = self._get_splunk_utctimestamp( stat.get('Timestamp')) splunk_index = mqs.index splunk_host = mqs.host splunk_source = self._build_splunk_source( mqs.aws_region, mqs.metric_namespace) total_stats += len(buffer_data) for buffer_data_chunk in tuple( self._chunkify(buffer_data, self._multikv_chunksize)): event_data = "{}\n{}".format( self.METRIC_DATA_HEADER, '\n'.join([ self._build_multikv_line(mqs, stat) for mqs, stat in buffer_data_chunk ])) logger.log(logging.DEBUG, "PerfLog = EventStart") event = smi.Event(time=splunk_time, index=splunk_index, host=splunk_host, source=splunk_source, data=event_data, sourcetype=mqs.sourcetype, unbroken=False) logger.log( logging.DEBUG, "PerfLog = EventEnd, EventNum = {} ".format( len(buffer_data_chunk))) total_events += 1 logger.log(logging.DEBUG, "write_event") self._ew.write_event(event) logger.log(logging.DEBUG, "write_event done") # checkpoint for mqs, qw_endtime in self._mqs_pending_flush: if qw_endtime: mqs.previous_query_endtime = qw_endtime except: raise finally: total_metrics = len(self._mqs_pending_flush) # deque.extend() isn't documented as thread-safe # see: http://bugs.python.org/issue15329 # self._wait_queue.extend([mqs for mqs, qw_endtime in self._mqs_pending_flush]) for mqs, qw_endtime in self._mqs_pending_flush: self._wait_queue.append(mqs) self._buffer.clear() del self._mqs_pending_flush[:] self._checkpointer.save() except Exception as e: # flush failed logger.log(logging.ERROR, "%s", e) else: logger.log( logging.INFO, "Flush/Checkpoint completed: total_events: {}" " total_metrics: {} total_stats: {}" " in {:.3f}s".format(total_events, total_metrics, total_stats, time.time() - start_time))
def write_event(self, e, ew): #sys.stdout.write("%s\n" % json.dumps(e)) #sys.stdout.flush() event = smi.Event(data=json.dumps(e)) ew.write_event(event)