def keyMapping(aDict): '''map common key/fields to a normalized structure, explicitly typed when possible to avoid schema changes for upsteam consumers Special accomodations made for logstash,nxlog, beaver, heka and CEF Some shippers attempt to conform to logstash-style @fieldname convention. This strips the leading at symbol since it breaks some elastic search libraries like elasticutils. ''' returndict = dict() returndict['source'] = 'cloudtrail' returndict['details'] = {} returndict['category'] = 'cloudtrail' returndict['processid'] = str(os.getpid()) returndict['processname'] = sys.argv[0] returndict['severity'] = 'INFO' if 'sourceIPAddress' in aDict and 'eventName' in aDict and 'eventSource' in aDict: summary_str = "{0} performed {1} in {2}".format( aDict['sourceIPAddress'], aDict['eventName'], aDict['eventSource']) returndict['summary'] = summary_str if 'eventName' in aDict: # Uppercase first character aDict['eventName'] = aDict['eventName'][0].upper( ) + aDict['eventName'][1:] returndict['details']['eventVerb'] = CLOUDTRAIL_VERB_REGEX.findall( aDict['eventName'])[0] returndict['details']['eventReadOnly'] = ( returndict['details']['eventVerb'] in ['Describe', 'Get', 'List']) # set the timestamp when we received it, i.e. now returndict['receivedtimestamp'] = toUTC(datetime.now()).isoformat() returndict['mozdefhostname'] = options.mozdefhostname try: for k, v in aDict.iteritems(): k = removeAt(k).lower() if k == 'sourceip': returndict[u'details']['sourceipaddress'] = v elif k == 'sourceipaddress': returndict[u'details']['sourceipaddress'] = v elif k in ('facility', 'source'): returndict[u'source'] = v elif k in ('eventsource'): returndict[u'hostname'] = v elif k in ('message', 'summary'): returndict[u'summary'] = toUnicode(v) elif k in ('payload') and 'summary' not in aDict: # special case for heka if it sends payload as well as a summary, keep both but move payload to the details section. returndict[u'summary'] = toUnicode(v) elif k in ('payload'): returndict[u'details']['payload'] = toUnicode(v) elif k in ('eventtime', 'timestamp', 'utctimestamp', 'date'): returndict[u'utctimestamp'] = toUTC(v).isoformat() returndict[u'timestamp'] = toUTC(v).isoformat() elif k in ('hostname', 'source_host', 'host'): returndict[u'hostname'] = toUnicode(v) elif k in ('tags'): if 'tags' not in returndict: returndict[u'tags'] = [] if type(v) == list: returndict[u'tags'] += v else: if len(v) > 0: returndict[u'tags'].append(v) # nxlog keeps the severity name in syslogseverity,everyone else should use severity or level. elif k in ('syslogseverity', 'severity', 'severityvalue', 'level', 'priority'): returndict[u'severity'] = toUnicode(v).upper() elif k in ('facility', 'syslogfacility'): returndict[u'facility'] = toUnicode(v) elif k in ('pid', 'processid'): returndict[u'processid'] = toUnicode(v) # nxlog sets sourcename to the processname (i.e. sshd), everyone else should call it process name or pname elif k in ('pname', 'processname', 'sourcename', 'program'): returndict[u'processname'] = toUnicode(v) # the file, or source elif k in ('path', 'logger', 'file'): returndict[u'eventsource'] = toUnicode(v) elif k in ('type', 'eventtype', 'category'): returndict[u'category'] = toUnicode(v) # custom fields as a list/array elif k in ('fields', 'details'): if type(v) is not dict: returndict[u'details'][u'message'] = v else: if len(v) > 0: for details_key, details_value in v.iteritems(): returndict[u'details'][details_key] = details_value # custom fields/details as a one off, not in an array # i.e. fields.something=value or details.something=value # move them to a dict for consistency in querying elif k.startswith('fields.') or k.startswith('details.'): newName = k.replace('fields.', '') newName = newName.lower().replace('details.', '') # add a dict to hold the details if it doesn't exist if 'details' not in returndict: returndict[u'details'] = dict() # add field with a special case for shippers that # don't send details # in an array as int/floats/strings # we let them dictate the data type with field_datatype # convention if newName.endswith('_int'): returndict[u'details'][unicode(newName)] = int(v) elif newName.endswith('_float'): returndict[u'details'][unicode(newName)] = float(v) else: returndict[u'details'][unicode(newName)] = toUnicode(v) else: returndict[u'details'][k] = v if 'utctimestamp' not in returndict: # default in case we don't find a reasonable timestamp returndict['utctimestamp'] = toUTC(datetime.now()).isoformat() except Exception as e: logger.exception(e) logger.error('Malformed message: %r' % aDict) return returndict
def keyMapping(aDict): '''map common key/fields to a normalized structure, explicitly typed when possible to avoid schema changes for upsteam consumers Special accomodations made for logstash,nxlog, beaver, heka and CEF Some shippers attempt to conform to logstash-style @fieldname convention. This strips the leading at symbol since it breaks some elastic search libraries like elasticutils. ''' returndict = dict() # uncomment to save the source event for debugging, or chain of custody/forensics # returndict['original']=aDict # set the timestamp when we received it, i.e. now returndict['receivedtimestamp'] = toUTC(datetime.now()).isoformat() returndict['mozdefhostname'] = options.mozdefhostname returndict[u'details'] = {} try: for k, v in aDict.iteritems(): k = removeAt(k).lower() if k == 'sourceip': returndict[u'details']['eventsourceipaddress'] = v if k in ('facility', 'source'): returndict[u'source'] = v if k in ('message', 'summary'): returndict[u'summary'] = toUnicode(v) if k in ('payload') and 'summary' not in aDict: # special case for heka if it sends payload as well as a summary, keep both but move payload to the details section. returndict[u'summary'] = toUnicode(v) elif k in ('payload'): returndict[u'details']['payload'] = toUnicode(v) if k in ('eventtime', 'timestamp', 'utctimestamp', 'date'): returndict[u'utctimestamp'] = toUTC(v).isoformat() returndict[u'timestamp'] = toUTC(v).isoformat() if k in ('hostname', 'source_host', 'host'): returndict[u'hostname'] = toUnicode(v) if k in ('tags'): if 'tags' not in returndict: returndict[u'tags'] = [] if type(v) == list: returndict[u'tags'] += v else: if len(v) > 0: returndict[u'tags'].append(v) # nxlog keeps the severity name in syslogseverity,everyone else should use severity or level. if k in ('syslogseverity', 'severity', 'severityvalue', 'level', 'priority'): returndict[u'severity'] = toUnicode(v).upper() if k in ('facility', 'syslogfacility'): returndict[u'facility'] = toUnicode(v) if k in ('pid', 'processid'): returndict[u'processid'] = toUnicode(v) # nxlog sets sourcename to the processname (i.e. sshd), everyone else should call it process name or pname if k in ('pname', 'processname', 'sourcename', 'program'): returndict[u'processname'] = toUnicode(v) # the file, or source if k in ('path', 'logger', 'file'): returndict[u'eventsource'] = toUnicode(v) if k in ('type', 'eventtype', 'category'): returndict[u'category'] = toUnicode(v) # custom fields as a list/array if k in ('fields', 'details'): if type(v) is not dict: returndict[u'details'][u'message'] = v else: if len(v) > 0: for details_key, details_value in v.iteritems(): returndict[u'details'][details_key] = details_value # custom fields/details as a one off, not in an array # i.e. fields.something=value or details.something=value # move them to a dict for consistency in querying if k.startswith('fields.') or k.startswith('details.'): newName = k.replace('fields.', '') newName = newName.lower().replace('details.', '') # add field with a special case for shippers that # don't send details # in an array as int/floats/strings # we let them dictate the data type with field_datatype # convention if newName.endswith('_int'): returndict[u'details'][unicode(newName)] = int(v) elif newName.endswith('_float'): returndict[u'details'][unicode(newName)] = float(v) else: returndict[u'details'][unicode(newName)] = toUnicode(v) # nxlog windows log handling if 'Domain' in aDict and 'SourceModuleType' in aDict: # nxlog parses all windows event fields very well # copy all fields to details returndict[u'details'][k] = v if 'utctimestamp' not in returndict: # default in case we don't find a reasonable timestamp returndict['utctimestamp'] = toUTC(datetime.now()).isoformat() except Exception as e: logger.exception('Received exception while normalizing message: %r' % e) logger.error('Malformed message: %r' % aDict) return None return returndict
def keyMapping(aDict): '''map common key/fields to a normalized structure, explicitly typed when possible to avoid schema changes for upsteam consumers Special accomodations made for logstash,nxlog, beaver, heka and CEF Some shippers attempt to conform to logstash-style @fieldname convention. This strips the leading at symbol since it breaks some elastic search libraries like elasticutils. ''' returndict = dict() # uncomment to save the source event for debugging, or chain of custody/forensics # returndict['original']=aDict # set the timestamp when we received it, i.e. now returndict['receivedtimestamp'] = toUTC(datetime.now()).isoformat() returndict['mozdefhostname'] = options.mozdefhostname returndict[u'details'] = {} try: for k, v in aDict.iteritems(): k = removeAt(k).lower() if k == 'sourceip': returndict[u'details']['eventsourceipaddress'] = v if k == 'facility': returndict[u'source'] = v if k in ('message', 'summary'): returndict[u'summary'] = toUnicode(v) if k in ('payload') and 'summary' not in aDict.keys(): # special case for heka if it sends payload as well as a summary, keep both but move payload to the details section. returndict[u'summary'] = toUnicode(v) elif k in ('payload'): returndict[u'details']['payload'] = toUnicode(v) if k in ('eventtime', 'timestamp', 'utctimestamp', 'date'): returndict[u'utctimestamp'] = toUTC(v).isoformat() returndict[u'timestamp'] = toUTC(v).isoformat() if k in ('hostname', 'source_host', 'host'): returndict[u'hostname'] = toUnicode(v) if k in ('tags'): if 'tags' not in returndict.keys(): returndict[u'tags'] = [] if type(v) == list: returndict[u'tags'] += v else: if len(v) > 0: returndict[u'tags'].append(v) # nxlog keeps the severity name in syslogseverity,everyone else should use severity or level. if k in ('syslogseverity', 'severity', 'severityvalue', 'level', 'priority'): returndict[u'severity'] = toUnicode(v).upper() if k in ('facility', 'syslogfacility'): returndict[u'facility'] = toUnicode(v) if k in ('pid', 'processid'): returndict[u'processid'] = toUnicode(v) # nxlog sets sourcename to the processname (i.e. sshd), everyone else should call it process name or pname if k in ('pname', 'processname', 'sourcename', 'program'): returndict[u'processname'] = toUnicode(v) # the file, or source if k in ('path', 'logger', 'file'): returndict[u'eventsource'] = toUnicode(v) if k in ('type', 'eventtype', 'category'): returndict[u'category'] = toUnicode(v) # custom fields as a list/array if k in ('fields', 'details'): if type(v) is not dict: returndict[u'details'][u'message'] = v else: if len(v) > 0: for details_key, details_value in v.iteritems(): returndict[u'details'][details_key] = details_value # custom fields/details as a one off, not in an array # i.e. fields.something=value or details.something=value # move them to a dict for consistency in querying if k.startswith('fields.') or k.startswith('details.'): newName = k.replace('fields.', '') newName = newName.lower().replace('details.', '') # add field with a special case for shippers that # don't send details # in an array as int/floats/strings # we let them dictate the data type with field_datatype # convention if newName.endswith('_int'): returndict[u'details'][unicode(newName)] = int(v) elif newName.endswith('_float'): returndict[u'details'][unicode(newName)] = float(v) else: returndict[u'details'][unicode(newName)] = toUnicode(v) # nxlog windows log handling if 'Domain' in aDict.keys() and 'SourceModuleType' in aDict.keys(): # nxlog parses all windows event fields very well # copy all fields to details returndict[u'details'][k] = v if 'utctimestamp' not in returndict.keys(): # default in case we don't find a reasonable timestamp returndict['utctimestamp'] = toUTC(datetime.now()).isoformat() except Exception as e: logger.exception('Received exception while normalizing message: %r' % e) logger.error('Malformed message: %r' % aDict) return None return returndict
def keyMapping(aDict): """map common key/fields to a normalized structure, explicitly typed when possible to avoid schema changes for upsteam consumers Special accomodations made for logstash,nxlog, beaver, heka and CEF Some shippers attempt to conform to logstash-style @fieldname convention. This strips the leading at symbol since it breaks some elastic search libraries like elasticutils. """ returndict = dict() # uncomment to save the source event for debugging, or chain of custody/forensics # returndict['original']=aDict # set the timestamp when we received it, i.e. now returndict["receivedtimestamp"] = toUTC(datetime.now()).isoformat() returndict["mozdefhostname"] = options.mozdefhostname returndict["details"] = {} try: for k, v in aDict.items(): k = removeAt(k).lower() if k in ("message", "summary"): returndict["summary"] = toUnicode(v) if k in ("payload") and "summary" not in aDict: # special case for heka if it sends payload as well as a summary, keep both but move payload to the details section. returndict["summary"] = toUnicode(v) elif k in ("payload"): returndict["details"]["payload"] = toUnicode(v) if k in ("eventtime", "timestamp", "utctimestamp"): returndict["utctimestamp"] = toUTC(v).isoformat() returndict["timestamp"] = toUTC(v).isoformat() if k in ("hostname", "source_host", "host"): returndict["hostname"] = toUnicode(v) if k in ("tags"): if len(v) > 0: returndict["tags"] = v # nxlog keeps the severity name in syslogseverity,everyone else should use severity or level. if k in ("syslogseverity", "severity", "severityvalue", "level"): returndict["severity"] = toUnicode(v).upper() if k in ("facility", "syslogfacility", "source"): returndict["source"] = toUnicode(v) if k in ("pid", "processid"): returndict["processid"] = toUnicode(v) # nxlog sets sourcename to the processname (i.e. sshd), everyone else should call it process name or pname if k in ("pname", "processname", "sourcename"): returndict["processname"] = toUnicode(v) # the file, or source if k in ("path", "logger", "file"): returndict["eventsource"] = toUnicode(v) if k in ("type", "eventtype", "category"): returndict["category"] = toUnicode(v) # custom fields as a list/array if k in ("fields", "details"): if type(v) is not dict: returndict["details"]["message"] = v else: if len(v) > 0: for details_key, details_value in v.items(): returndict["details"][details_key] = details_value # custom fields/details as a one off, not in an array # i.e. fields.something=value or details.something=value # move them to a dict for consistency in querying if k.startswith("fields.") or k.startswith("details."): newName = k.replace("fields.", "") newName = newName.lower().replace("details.", "") # add field with a special case for shippers that # don't send details # in an array as int/floats/strings # we let them dictate the data type with field_datatype # convention if newName.endswith("_int"): returndict["details"][str(newName)] = int(v) elif newName.endswith("_float"): returndict["details"][str(newName)] = float(v) else: returndict["details"][str(newName)] = toUnicode(v) # nxlog windows log handling if "Domain" in aDict and "SourceModuleType" in aDict: # nxlog parses all windows event fields very well # copy all fields to details returndict["details"][k] = v if "utctimestamp" not in returndict: # default in case we don't find a reasonable timestamp returndict["utctimestamp"] = toUTC(datetime.now()).isoformat() if "type" not in returndict: # default replacement for old _type subcategory. # to preserve filtering capabilities returndict["type"] = "event" except Exception as e: logger.exception("Exception normalizing the message %r" % e) logger.error("Malformed message dict: %r" % aDict) return None return returndict
def keyMapping(aDict): '''map common key/fields to a normalized structure, explicitly typed when possible to avoid schema changes for upsteam consumers Special accomodations made for logstash,nxlog, beaver, heka and CEF Some shippers attempt to conform to logstash-style @fieldname convention. This strips the leading at symbol since it breaks some elastic search libraries like elasticutils. ''' returndict = dict() returndict['source'] = 'cloudtrail' returndict['details'] = {} returndict['category'] = 'cloudtrail' returndict['processid'] = str(os.getpid()) returndict['processname'] = sys.argv[0] returndict['severity'] = 'INFO' if 'sourceIPAddress' in aDict and 'eventName' in aDict and 'eventSource' in aDict: summary_str = "{0} performed {1} in {2}".format( aDict['sourceIPAddress'], aDict['eventName'], aDict['eventSource'] ) returndict['summary'] = summary_str if 'eventName' in aDict: # Uppercase first character aDict['eventName'] = aDict['eventName'][0].upper() + aDict['eventName'][1:] returndict['details']['eventVerb'] = CLOUDTRAIL_VERB_REGEX.findall(aDict['eventName'])[0] returndict['details']['eventReadOnly'] = (returndict['details']['eventVerb'] in ['Describe', 'Get', 'List']) # set the timestamp when we received it, i.e. now returndict['receivedtimestamp'] = toUTC(datetime.now()).isoformat() returndict['mozdefhostname'] = options.mozdefhostname try: for k, v in aDict.iteritems(): k = removeAt(k).lower() if k == 'sourceip': returndict[u'details']['sourceipaddress'] = v elif k == 'sourceipaddress': returndict[u'details']['sourceipaddress'] = v elif k in ('facility', 'source'): returndict[u'source'] = v elif k in ('eventsource'): returndict[u'hostname'] = v elif k in ('message', 'summary'): returndict[u'summary'] = toUnicode(v) elif k in ('payload') and 'summary' not in aDict: # special case for heka if it sends payload as well as a summary, keep both but move payload to the details section. returndict[u'summary'] = toUnicode(v) elif k in ('payload'): returndict[u'details']['payload'] = toUnicode(v) elif k in ('eventtime', 'timestamp', 'utctimestamp', 'date'): returndict[u'utctimestamp'] = toUTC(v).isoformat() returndict[u'timestamp'] = toUTC(v).isoformat() elif k in ('hostname', 'source_host', 'host'): returndict[u'hostname'] = toUnicode(v) elif k in ('tags'): if 'tags' not in returndict: returndict[u'tags'] = [] if type(v) == list: returndict[u'tags'] += v else: if len(v) > 0: returndict[u'tags'].append(v) # nxlog keeps the severity name in syslogseverity,everyone else should use severity or level. elif k in ('syslogseverity', 'severity', 'severityvalue', 'level', 'priority'): returndict[u'severity'] = toUnicode(v).upper() elif k in ('facility', 'syslogfacility'): returndict[u'facility'] = toUnicode(v) elif k in ('pid', 'processid'): returndict[u'processid'] = toUnicode(v) # nxlog sets sourcename to the processname (i.e. sshd), everyone else should call it process name or pname elif k in ('pname', 'processname', 'sourcename', 'program'): returndict[u'processname'] = toUnicode(v) # the file, or source elif k in ('path', 'logger', 'file'): returndict[u'eventsource'] = toUnicode(v) elif k in ('type', 'eventtype', 'category'): returndict[u'category'] = toUnicode(v) returndict[u'type'] = 'cloudtrail' # custom fields as a list/array elif k in ('fields', 'details'): if type(v) is not dict: returndict[u'details'][u'message'] = v else: if len(v) > 0: for details_key, details_value in v.iteritems(): returndict[u'details'][details_key] = details_value # custom fields/details as a one off, not in an array # i.e. fields.something=value or details.something=value # move them to a dict for consistency in querying elif k.startswith('fields.') or k.startswith('details.'): newName = k.replace('fields.', '') newName = newName.lower().replace('details.', '') # add a dict to hold the details if it doesn't exist if 'details' not in returndict: returndict[u'details'] = dict() # add field with a special case for shippers that # don't send details # in an array as int/floats/strings # we let them dictate the data type with field_datatype # convention if newName.endswith('_int'): returndict[u'details'][unicode(newName)] = int(v) elif newName.endswith('_float'): returndict[u'details'][unicode(newName)] = float(v) else: returndict[u'details'][unicode(newName)] = toUnicode(v) else: returndict[u'details'][k] = v if 'utctimestamp' not in returndict: # default in case we don't find a reasonable timestamp returndict['utctimestamp'] = toUTC(datetime.now()).isoformat() except Exception as e: logger.exception(e) logger.error('Malformed message: %r' % aDict) return returndict
def keyMapping(aDict): """map common key/fields to a normalized structure, explicitly typed when possible to avoid schema changes for upsteam consumers Special accomodations made for logstash,nxlog, beaver, heka and CEF Some shippers attempt to conform to logstash-style @fieldname convention. This strips the leading at symbol since it breaks some elastic search libraries like elasticutils. """ returndict = dict() returndict["source"] = "cloudtrail" returndict["details"] = {} returndict["category"] = "cloudtrail" returndict["processid"] = str(os.getpid()) returndict["processname"] = sys.argv[0] returndict["tags"] = [options.taskexchange] returndict["severity"] = "INFO" if "sourceIPAddress" in aDict and "eventName" in aDict and "eventSource" in aDict: summary_str = "{0} performed {1} in {2}".format( aDict["sourceIPAddress"], aDict["eventName"], aDict["eventSource"]) returndict["summary"] = summary_str if "eventName" in aDict: # Uppercase first character aDict["eventName"] = aDict["eventName"][0].upper( ) + aDict["eventName"][1:] returndict["details"]["eventVerb"] = CLOUDTRAIL_VERB_REGEX.findall( aDict["eventName"])[0] returndict["details"]["eventReadOnly"] = returndict["details"][ "eventVerb"] in ["Describe", "Get", "List"] # set the timestamp when we received it, i.e. now returndict["receivedtimestamp"] = toUTC(datetime.now()).isoformat() returndict["mozdefhostname"] = options.mozdefhostname try: for k, v in aDict.items(): k = removeAt(k).lower() if k == "sourceip": returndict["details"]["sourceipaddress"] = v elif k == "sourceipaddress": returndict["details"]["sourceipaddress"] = v elif k in ("facility", "source"): returndict["source"] = v elif k in ("eventsource"): returndict["hostname"] = v elif k in ("message", "summary"): returndict["summary"] = toUnicode(v) elif k in ("payload") and "summary" not in aDict: # special case for heka if it sends payload as well as a summary, keep both but move payload to the details section. returndict["summary"] = toUnicode(v) elif k in ("payload"): returndict["details"]["payload"] = toUnicode(v) elif k in ("eventtime", "timestamp", "utctimestamp", "date"): returndict["utctimestamp"] = toUTC(v).isoformat() returndict["timestamp"] = toUTC(v).isoformat() elif k in ("hostname", "source_host", "host"): returndict["hostname"] = toUnicode(v) elif k in ("tags"): if "tags" not in returndict: returndict["tags"] = [] if type(v) == list: returndict["tags"] += v else: if len(v) > 0: returndict["tags"].append(v) # nxlog keeps the severity name in syslogseverity,everyone else should use severity or level. elif k in ("syslogseverity", "severity", "severityvalue", "level", "priority"): returndict["severity"] = toUnicode(v).upper() elif k in ("facility", "syslogfacility"): returndict["facility"] = toUnicode(v) elif k in ("pid", "processid"): returndict["processid"] = toUnicode(v) # nxlog sets sourcename to the processname (i.e. sshd), everyone else should call it process name or pname elif k in ("pname", "processname", "sourcename", "program"): returndict["processname"] = toUnicode(v) # the file, or source elif k in ("path", "logger", "file"): returndict["eventsource"] = toUnicode(v) elif k in ("type", "eventtype", "category"): returndict["category"] = toUnicode(v) returndict["type"] = "cloudtrail" # custom fields as a list/array elif k in ("fields", "details"): if type(v) is not dict: returndict["details"]["message"] = v else: if len(v) > 0: for details_key, details_value in v.items(): returndict["details"][details_key] = details_value # custom fields/details as a one off, not in an array # i.e. fields.something=value or details.something=value # move them to a dict for consistency in querying elif k.startswith("fields.") or k.startswith("details."): newName = k.replace("fields.", "") newName = newName.lower().replace("details.", "") # add a dict to hold the details if it doesn't exist if "details" not in returndict: returndict["details"] = dict() # add field with a special case for shippers that # don't send details # in an array as int/floats/strings # we let them dictate the data type with field_datatype # convention if newName.endswith("_int"): returndict["details"][str(newName)] = int(v) elif newName.endswith("_float"): returndict["details"][str(newName)] = float(v) else: returndict["details"][str(newName)] = toUnicode(v) else: returndict["details"][k] = v if "utctimestamp" not in returndict: # default in case we don't find a reasonable timestamp returndict["utctimestamp"] = toUTC(datetime.now()).isoformat() except Exception as e: logger.exception(e) logger.error("Malformed message: %r" % aDict) return returndict