def on_message(self, body, message): # print("RECEIVED MESSAGE: %r" % (body, )) try: # default elastic search metadata for an event metadata = {'index': 'events', 'doc_type': 'event', 'id': None} # just to be safe..check what we were sent. if isinstance(body, dict): bodyDict = body elif isinstance(body, str) or isinstance(body, unicode): try: bodyDict = json.loads(body) # lets assume it's json except ValueError as e: # not json..ack but log the message logger.error( "esworker exception: unknown body type received %r" % body) # message.ack() return else: logger.error( "esworker exception: unknown body type received %r" % body) # message.ack() return if 'customendpoint' in bodyDict.keys( ) and bodyDict['customendpoint']: # custom document # send to plugins to allow them to modify it if needed (normalizedDict, metadata) = sendEventToPlugins(bodyDict, metadata, pluginList) else: # normalize the dict # to the mozdef events standard normalizedDict = keyMapping(bodyDict) # send to plugins to allow them to modify it if needed if normalizedDict is not None and isinstance( normalizedDict, dict) and normalizedDict.keys(): (normalizedDict, metadata) = sendEventToPlugins(normalizedDict, metadata, pluginList) # drop the message if a plug in set it to None # signaling a discard if normalizedDict is None: # message.ack() return # make a json version for posting to elastic search jbody = json.JSONEncoder().encode(normalizedDict) if isCEF(normalizedDict): # cef records are set to the 'deviceproduct' field value. metadata['doc_type'] = 'cef' if 'details' in normalizedDict.keys( ) and 'deviceproduct' in normalizedDict['details'].keys(): # don't create strange doc types.. if ' ' not in normalizedDict['details'][ 'deviceproduct'] and '.' not in normalizedDict[ 'details']['deviceproduct']: metadata['doc_type'] = normalizedDict['details'][ 'deviceproduct'] try: bulk = False if options.esbulksize != 0: bulk = True self.esConnection.save_event(index=metadata['index'], doc_id=metadata['id'], doc_type=metadata['doc_type'], body=jbody, bulk=bulk) except (ElasticsearchBadServer, ElasticsearchInvalidIndex) as e: # handle loss of server or race condition with index rotation/creation/aliasing try: self.esConnection = esConnect() # message.requeue() return except kombu.exceptions.MessageStateError: # state may be already set. return except ElasticsearchException as e: # exception target for queue capacity issues reported by elastic search so catch the error, report it and retry the message try: logger.exception( 'ElasticSearchException: {0} reported while indexing event' .format(e)) # message.requeue() return except kombu.exceptions.MessageStateError: # state may be already set. return # message.ack() except Exception as e: logger.exception(e) logger.error('Malformed message body: %r' % body)
def keyMapping(aDict): '''map common key/fields to a normalized structure, explicitly typed when possible to avoid schema changes for upsteam consumers Special accomodations made for logstash,nxlog, beaver, heka and CEF Some shippers attempt to conform to logstash-style @fieldname convention. This strips the leading at symbol since it breaks some elastic search libraries like elasticutils. ''' returndict = dict() returndict['source'] = 'cloudtrail' returndict['details'] = {} returndict['category'] = 'cloudtrail' returndict['processid'] = str(os.getpid()) returndict['processname'] = sys.argv[0] returndict['severity'] = 'INFO' if 'sourceIPAddress' in aDict and 'eventName' in aDict and 'eventSource' in aDict: summary_str = "{0} performed {1} in {2}".format( aDict['sourceIPAddress'], aDict['eventName'], aDict['eventSource'] ) returndict['summary'] = summary_str if 'eventName' in aDict: # Uppercase first character aDict['eventName'] = aDict['eventName'][0].upper() + aDict['eventName'][1:] returndict['details']['eventVerb'] = CLOUDTRAIL_VERB_REGEX.findall(aDict['eventName'])[0] returndict['details']['eventReadOnly'] = (returndict['details']['eventVerb'] in ['Describe', 'Get', 'List']) # set the timestamp when we received it, i.e. now returndict['receivedtimestamp'] = toUTC(datetime.now()).isoformat() returndict['mozdefhostname'] = options.mozdefhostname try: for k, v in aDict.iteritems(): k = removeAt(k).lower() if k == 'sourceip': returndict[u'details']['sourceipaddress'] = v elif k == 'sourceipaddress': returndict[u'details']['sourceipaddress'] = v elif k == 'facility': returndict[u'source'] = v elif k in ('eventsource'): returndict[u'hostname'] = v elif k in ('message', 'summary'): returndict[u'summary'] = toUnicode(v) elif k in ('payload') and 'summary' not in aDict.keys(): # special case for heka if it sends payload as well as a summary, keep both but move payload to the details section. returndict[u'summary'] = toUnicode(v) elif k in ('payload'): returndict[u'details']['payload'] = toUnicode(v) elif k in ('eventtime', 'timestamp', 'utctimestamp', 'date'): returndict[u'utctimestamp'] = toUTC(v).isoformat() returndict[u'timestamp'] = toUTC(v).isoformat() elif k in ('hostname', 'source_host', 'host'): returndict[u'hostname'] = toUnicode(v) elif k in ('tags'): if 'tags' not in returndict.keys(): returndict[u'tags'] = [] if type(v) == list: returndict[u'tags'] += v else: if len(v) > 0: returndict[u'tags'].append(v) # nxlog keeps the severity name in syslogseverity,everyone else should use severity or level. elif k in ('syslogseverity', 'severity', 'severityvalue', 'level', 'priority'): returndict[u'severity'] = toUnicode(v).upper() elif k in ('facility', 'syslogfacility'): returndict[u'facility'] = toUnicode(v) elif k in ('pid', 'processid'): returndict[u'processid'] = toUnicode(v) # nxlog sets sourcename to the processname (i.e. sshd), everyone else should call it process name or pname elif k in ('pname', 'processname', 'sourcename', 'program'): returndict[u'processname'] = toUnicode(v) # the file, or source elif k in ('path', 'logger', 'file'): returndict[u'eventsource'] = toUnicode(v) elif k in ('type', 'eventtype', 'category'): returndict[u'category'] = toUnicode(v) # custom fields as a list/array elif k in ('fields', 'details'): if type(v) is not dict: returndict[u'details'][u'message'] = v else: if len(v) > 0: for details_key, details_value in v.iteritems(): returndict[u'details'][details_key] = details_value # custom fields/details as a one off, not in an array # i.e. fields.something=value or details.something=value # move them to a dict for consistency in querying elif k.startswith('fields.') or k.startswith('details.'): newName = k.replace('fields.', '') newName = newName.lower().replace('details.', '') # add a dict to hold the details if it doesn't exist if 'details' not in returndict.keys(): returndict[u'details'] = dict() # add field with a special case for shippers that # don't send details # in an array as int/floats/strings # we let them dictate the data type with field_datatype # convention if newName.endswith('_int'): returndict[u'details'][unicode(newName)] = int(v) elif newName.endswith('_float'): returndict[u'details'][unicode(newName)] = float(v) else: returndict[u'details'][unicode(newName)] = toUnicode(v) else: returndict[u'details'][k] = v if 'utctimestamp' not in returndict.keys(): # default in case we don't find a reasonable timestamp returndict['utctimestamp'] = toUTC(datetime.now()).isoformat() except Exception as e: logger.exception(e) logger.error('Malformed message: %r' % aDict) return returndict
def save_documents(self, documents): try: bulk(self.es_connection, documents) except BulkIndexError as e: logger.error("Error bulk indexing: " + str(e))
def keyMapping(aDict): '''map common key/fields to a normalized structure, explicitly typed when possible to avoid schema changes for upsteam consumers Special accomodations made for logstash,nxlog, beaver, heka and CEF Some shippers attempt to conform to logstash-style @fieldname convention. This strips the leading at symbol since it breaks some elastic search libraries like elasticutils. ''' returndict = dict() # uncomment to save the source event for debugging, or chain of custody/forensics # returndict['original']=aDict # set the timestamp when we received it, i.e. now returndict['receivedtimestamp'] = toUTC(datetime.now()).isoformat() returndict['mozdefhostname'] = options.mozdefhostname returndict['details'] = {} try: for k, v in aDict.iteritems(): k = removeAt(k).lower() if k in ('message', 'summary'): returndict[u'summary'] = toUnicode(v) if k in ('payload') and 'summary' not in aDict.keys(): # special case for heka if it sends payload as well as a summary, keep both but move payload to the details section. returndict[u'summary'] = toUnicode(v) elif k in ('payload'): returndict[u'details']['payload'] = toUnicode(v) if k in ('eventtime', 'timestamp', 'utctimestamp'): returndict[u'utctimestamp'] = toUTC(v).isoformat() returndict[u'timestamp'] = toUTC(v).isoformat() if k in ('hostname', 'source_host', 'host'): returndict[u'hostname'] = toUnicode(v) if k in ('tags'): if len(v) > 0: returndict[u'tags'] = v # nxlog keeps the severity name in syslogseverity,everyone else should use severity or level. if k in ('syslogseverity', 'severity', 'severityvalue', 'level'): returndict[u'severity'] = toUnicode(v).upper() if k in ('facility', 'syslogfacility'): returndict[u'facility'] = toUnicode(v) if k in ('pid', 'processid'): returndict[u'processid'] = toUnicode(v) # nxlog sets sourcename to the processname (i.e. sshd), everyone else should call it process name or pname if k in ('pname', 'processname', 'sourcename'): returndict[u'processname'] = toUnicode(v) # the file, or source if k in ('path', 'logger', 'file'): returndict[u'eventsource'] = toUnicode(v) if k in ('type', 'eventtype', 'category'): returndict[u'category'] = toUnicode(v) # custom fields as a list/array if k in ('fields', 'details'): if type(v) is not dict: returndict[u'details'][u'message'] = v else: if len(v) > 0: for details_key, details_value in v.iteritems(): returndict[u'details'][details_key] = details_value # custom fields/details as a one off, not in an array # i.e. fields.something=value or details.something=value # move them to a dict for consistency in querying if k.startswith('fields.') or k.startswith('details.'): newName = k.replace('fields.', '') newName = newName.lower().replace('details.', '') # add field with a special case for shippers that # don't send details # in an array as int/floats/strings # we let them dictate the data type with field_datatype # convention if newName.endswith('_int'): returndict[u'details'][unicode(newName)] = int(v) elif newName.endswith('_float'): returndict[u'details'][unicode(newName)] = float(v) else: returndict[u'details'][unicode(newName)] = toUnicode(v) # nxlog windows log handling if 'Domain' in aDict.keys() and 'SourceModuleType' in aDict.keys(): # nxlog parses all windows event fields very well # copy all fields to details returndict[u'details'][k] = v if 'utctimestamp' not in returndict.keys(): # default in case we don't find a reasonable timestamp returndict['utctimestamp'] = toUTC(datetime.now()).isoformat() except Exception as e: logger.exception('Received exception while normalizing message: %r' % e) logger.error('Malformed message: %r' % aDict) return None return returndict
def on_message(self, message): try: # default elastic search metadata for an event metadata = {'index': 'events', 'doc_type': 'event', 'id': None} event = {} event['receivedtimestamp'] = toUTC(datetime.now()).isoformat() event['mozdefhostname'] = self.options.mozdefhostname if 'tags' in event: event['tags'].extend([self.options.taskexchange]) else: event['tags'] = [self.options.taskexchange] event['severity'] = 'INFO' # Set defaults event['processid'] = '' event['processname'] = '' event['category'] = 'syslog' for message_key, message_value in message.iteritems(): if 'Message' == message_key: try: message_json = json.loads(message_value) for inside_message_key, inside_message_value in message_json.iteritems( ): if inside_message_key in ('processid', 'pid'): processid = str(inside_message_value) processid = processid.replace('[', '') processid = processid.replace(']', '') event['processid'] = processid elif inside_message_key in ('pname'): event['processname'] = inside_message_value elif inside_message_key in ('hostname'): event['hostname'] = inside_message_value elif inside_message_key in ('time', 'timestamp'): event['timestamp'] = toUTC( inside_message_value).isoformat() event['utctimestamp'] = toUTC( event['timestamp']).astimezone( pytz.utc).isoformat() elif inside_message_key in ('type'): event['category'] = inside_message_value elif inside_message_key in ('payload', 'message'): event['summary'] = inside_message_value else: if 'details' not in event: event['details'] = {} event['details'][ inside_message_key] = inside_message_value except ValueError: event['summary'] = message_value (event, metadata) = sendEventToPlugins(event, metadata, self.pluginList) # Drop message if plugins set to None if event is None: return self.save_event(event, metadata) except Exception as e: logger.exception(e) logger.error('Malformed message: %r' % message)
def save_documents(self, documents): try: bulk(self.es_connection, documents) except BulkIndexError as e: logger.error("Error bulk indexing: " + str(e))
def on_message(self, body): # print("RECEIVED MESSAGE: %r" % (body, )) try: # default elastic search metadata for an event metadata = { 'index': 'events', 'doc_type': 'cloudtrail', 'id': None } # just to be safe..check what we were sent. if isinstance(body, dict): bodyDict = body elif isinstance(body, str) or isinstance(body, unicode): try: bodyDict = json.loads(body) # lets assume it's json except ValueError as e: # not json..ack but log the message logger.error("Unknown body type received %r" % body) return else: logger.error("Unknown body type received %r\n" % body) return if 'customendpoint' in bodyDict.keys( ) and bodyDict['customendpoint']: # custom document # send to plugins to allow them to modify it if needed (normalizedDict, metadata) = sendEventToPlugins(bodyDict, metadata, pluginList) else: # normalize the dict # to the mozdef events standard normalizedDict = keyMapping(bodyDict) # send to plugins to allow them to modify it if needed if normalizedDict is not None and isinstance( normalizedDict, dict) and normalizedDict.keys(): (normalizedDict, metadata) = sendEventToPlugins(normalizedDict, metadata, pluginList) # drop the message if a plug in set it to None # signaling a discard if normalizedDict is None: return # This handles the fact that cloudtrail sends # inconsistent data types in the iamInstanceProfile field if 'details' in normalizedDict: if 'requestparameters' in normalizedDict['details']: if normalizedDict['details'][ 'requestparameters'] is not None and 'iamInstanceProfile' in normalizedDict[ 'details']['requestparameters']: iam_instance_profile = normalizedDict['details'][ 'requestparameters']['iamInstanceProfile'] if type(iam_instance_profile) is not dict: normalizedDict['details']['requestparameters'][ 'iamInstanceProfile'] = { 'name': iam_instance_profile } # make a json version for posting to elastic search jbody = json.JSONEncoder().encode(normalizedDict) try: bulk = False if options.esbulksize != 0: bulk = True bulk = False self.esConnection.save_event(index=metadata['index'], doc_id=metadata['id'], doc_type=metadata['doc_type'], body=jbody, bulk=bulk) except (ElasticsearchBadServer, ElasticsearchInvalidIndex) as e: # handle loss of server or race condition with index rotation/creation/aliasing try: self.esConnection = esConnect() return except kombu.exceptions.MessageStateError: # state may be already set. return except ElasticsearchException as e: # exception target for queue capacity issues reported by elastic search so catch the error, report it and retry the message try: logger.exception( 'ElasticSearchException: {0} reported while indexing event' .format(e)) return except kombu.exceptions.MessageStateError: # state may be already set. return except Exception as e: logger.exception(e) logger.error('Malformed message: %r' % body)
def getQueueSizes(): logger.debug('starting') logger.debug(options) es = ElasticsearchClient(options.esservers) sqslist = {} sqslist['queue_stats'] = {} qcount = len(options.taskexchange) qcounter = qcount - 1 try: # meant only to talk to SQS using boto # and return queue attributes.a mqConn = boto.sqs.connect_to_region( options.region, aws_access_key_id=options.accesskey, aws_secret_access_key=options.secretkey) while qcounter >= 0: for exchange in options.taskexchange: logger.debug('Looking for sqs queue stats in queue' + exchange) eventTaskQueue = mqConn.get_queue(exchange) # get queue stats taskQueueStats = eventTaskQueue.get_attributes('All') sqslist['queue_stats'][qcounter] = taskQueueStats sqslist['queue_stats'][qcounter]['name'] = exchange qcounter -= 1 except Exception as e: logger.error("Exception %r when gathering health and status " % e) # setup a log entry for health/status. sqsid = '{0}-{1}'.format(options.account, options.region) healthlog = dict(utctimestamp=toUTC(datetime.now()).isoformat(), hostname=sqsid, processid=os.getpid(), processname=sys.argv[0], severity='INFO', summary='mozdef health/status', category='mozdef', source='aws-sqs', tags=[], details=[]) healthlog['details'] = dict(username='******') healthlog['details']['queues'] = list() healthlog['details']['total_messages_ready'] = 0 healthlog['details']['total_feeds'] = qcount healthlog['tags'] = ['mozdef', 'status', 'sqs'] ready = 0 qcounter = qcount - 1 for q in sqslist['queue_stats'].keys(): queuelist = sqslist['queue_stats'][qcounter] if 'ApproximateNumberOfMessages' in queuelist: ready1 = int(queuelist['ApproximateNumberOfMessages']) ready = ready1 + ready healthlog['details']['total_messages_ready'] = ready if 'ApproximateNumberOfMessages' in queuelist: messages = int(queuelist['ApproximateNumberOfMessages']) if 'ApproximateNumberOfMessagesNotVisible' in queuelist: inflight = int(queuelist['ApproximateNumberOfMessagesNotVisible']) if 'ApproximateNumberOfMessagesDelayed' in queuelist: delayed = int(queuelist['ApproximateNumberOfMessagesDelayed']) if 'name' in queuelist: name = queuelist['name'] queueinfo = dict(queue=name, messages_delayed=delayed, messages_ready=messages, messages_inflight=inflight) healthlog['details']['queues'].append(queueinfo) qcounter -= 1 # post to elasticsearch servers directly without going through # message queues in case there is an availability issue es.save_event(index=options.index, doc_type='mozdefhealth', body=json.dumps(healthlog)) # post another doc with a static docid and tag # for use when querying for the latest sqs status healthlog['tags'] = ['mozdef', 'status', 'sqs-latest'] es.save_event(index=options.index, doc_type='mozdefhealth', doc_id=getDocID(sqsid), body=json.dumps(healthlog))
def run(self): # Boto expects base64 encoded messages - but if the writer is not boto it's not necessarily base64 encoded # Thus we've to detect that and decode or not decode accordingly self.taskQueue.set_message_class(RawMessage) while True: try: records = self.taskQueue.get_messages(options.prefetch) for msg in records: # msg.id is the id, # get_body() should be json # pre process the message a bit tmp = msg.get_body() try: msgbody = json.loads(tmp) except ValueError: # If Boto wrote to the queue, it might be base64 encoded, so let's decode that try: tmp = base64.b64decode(tmp) msgbody = json.loads(tmp) except Exception as e: logger.error( 'Invalid message, not JSON <dropping message and continuing>: %r' % msg.get_body()) self.taskQueue.delete_message(msg) continue # If this is still not a dict, # let's just drop the message and move on if type(msgbody) is not dict: logger.debug( "Message is not a dictionary, dropping message.") self.taskQueue.delete_message(msg) continue event = dict() event = msgbody # Was this message sent by fluentd-sqs fluentd_sqs_specific_fields = { 'az', 'instance_id', '__tag' } if fluentd_sqs_specific_fields.issubset(set( msgbody.keys())): # Until we can influence fluentd-sqs to set the # 'customendpoint' key before submitting to SQS, we'll # need to do it here # TODO : Change nubis fluentd output to include # 'customendpoint' event['customendpoint'] = True if 'tags' in event: event['tags'].extend([options.taskexchange]) else: event['tags'] = [options.taskexchange] # process message self.on_message(event, msg) # delete message from queue self.taskQueue.delete_message(msg) time.sleep(.1) except ValueError as e: logger.exception('Exception while handling message: %r' % e) self.taskQueue.delete_message(msg) except (SSLEOFError, SSLError, socket.error): logger.info('Received network related error...reconnecting') time.sleep(5) self.connection, self.taskQueue = connect_sqs( options.region, options.accesskey, options.secretkey, options.taskexchange) self.taskQueue.set_message_class(RawMessage)