def keyMapping(aDict): '''map common key/fields to a normalized structure, explicitly typed when possible to avoid schema changes for upsteam consumers Special accomodations made for logstash,nxlog, beaver, heka and CEF Some shippers attempt to conform to logstash-style @fieldname convention. This strips the leading at symbol since it breaks some elastic search libraries like elasticutils. ''' returndict = dict() returndict['source'] = 'cloudtrail' returndict['details'] = {} returndict['category'] = 'cloudtrail' returndict['processid'] = str(os.getpid()) returndict['processname'] = sys.argv[0] returndict['severity'] = 'INFO' if 'sourceIPAddress' in aDict and 'eventName' in aDict and 'eventSource' in aDict: summary_str = "{0} performed {1} in {2}".format( aDict['sourceIPAddress'], aDict['eventName'], aDict['eventSource'] ) returndict['summary'] = summary_str if 'eventName' in aDict: # Uppercase first character aDict['eventName'] = aDict['eventName'][0].upper() + aDict['eventName'][1:] returndict['details']['eventVerb'] = CLOUDTRAIL_VERB_REGEX.findall(aDict['eventName'])[0] returndict['details']['eventReadOnly'] = (returndict['details']['eventVerb'] in ['Describe', 'Get', 'List']) # set the timestamp when we received it, i.e. now returndict['receivedtimestamp'] = toUTC(datetime.now()).isoformat() returndict['mozdefhostname'] = options.mozdefhostname try: for k, v in aDict.iteritems(): k = removeAt(k).lower() if k == 'sourceip': returndict[u'details']['sourceipaddress'] = v elif k == 'sourceipaddress': returndict[u'details']['sourceipaddress'] = v elif k == 'facility': returndict[u'source'] = v elif k in ('eventsource'): returndict[u'hostname'] = v elif k in ('message', 'summary'): returndict[u'summary'] = toUnicode(v) elif k in ('payload') and 'summary' not in aDict.keys(): # special case for heka if it sends payload as well as a summary, keep both but move payload to the details section. returndict[u'summary'] = toUnicode(v) elif k in ('payload'): returndict[u'details']['payload'] = toUnicode(v) elif k in ('eventtime', 'timestamp', 'utctimestamp', 'date'): returndict[u'utctimestamp'] = toUTC(v).isoformat() returndict[u'timestamp'] = toUTC(v).isoformat() elif k in ('hostname', 'source_host', 'host'): returndict[u'hostname'] = toUnicode(v) elif k in ('tags'): if 'tags' not in returndict.keys(): returndict[u'tags'] = [] if type(v) == list: returndict[u'tags'] += v else: if len(v) > 0: returndict[u'tags'].append(v) # nxlog keeps the severity name in syslogseverity,everyone else should use severity or level. elif k in ('syslogseverity', 'severity', 'severityvalue', 'level', 'priority'): returndict[u'severity'] = toUnicode(v).upper() elif k in ('facility', 'syslogfacility'): returndict[u'facility'] = toUnicode(v) elif k in ('pid', 'processid'): returndict[u'processid'] = toUnicode(v) # nxlog sets sourcename to the processname (i.e. sshd), everyone else should call it process name or pname elif k in ('pname', 'processname', 'sourcename', 'program'): returndict[u'processname'] = toUnicode(v) # the file, or source elif k in ('path', 'logger', 'file'): returndict[u'eventsource'] = toUnicode(v) elif k in ('type', 'eventtype', 'category'): returndict[u'category'] = toUnicode(v) # custom fields as a list/array elif k in ('fields', 'details'): if type(v) is not dict: returndict[u'details'][u'message'] = v else: if len(v) > 0: for details_key, details_value in v.iteritems(): returndict[u'details'][details_key] = details_value # custom fields/details as a one off, not in an array # i.e. fields.something=value or details.something=value # move them to a dict for consistency in querying elif k.startswith('fields.') or k.startswith('details.'): newName = k.replace('fields.', '') newName = newName.lower().replace('details.', '') # add a dict to hold the details if it doesn't exist if 'details' not in returndict.keys(): returndict[u'details'] = dict() # add field with a special case for shippers that # don't send details # in an array as int/floats/strings # we let them dictate the data type with field_datatype # convention if newName.endswith('_int'): returndict[u'details'][unicode(newName)] = int(v) elif newName.endswith('_float'): returndict[u'details'][unicode(newName)] = float(v) else: returndict[u'details'][unicode(newName)] = toUnicode(v) else: returndict[u'details'][k] = v if 'utctimestamp' not in returndict.keys(): # default in case we don't find a reasonable timestamp returndict['utctimestamp'] = toUTC(datetime.now()).isoformat() except Exception as e: logger.exception(e) logger.error('Malformed message: %r' % aDict) return returndict
def keyMapping(aDict): '''map common key/fields to a normalized structure, explicitly typed when possible to avoid schema changes for upsteam consumers Special accomodations made for logstash,nxlog, beaver, heka and CEF Some shippers attempt to conform to logstash-style @fieldname convention. This strips the leading at symbol since it breaks some elastic search libraries like elasticutils. ''' returndict = dict() # uncomment to save the source event for debugging, or chain of custody/forensics # returndict['original']=aDict # set the timestamp when we received it, i.e. now returndict['receivedtimestamp'] = toUTC(datetime.now()).isoformat() returndict['mozdefhostname'] = options.mozdefhostname returndict['details'] = {} try: for k, v in aDict.iteritems(): k = removeAt(k).lower() if k in ('message', 'summary'): returndict[u'summary'] = toUnicode(v) if k in ('payload') and 'summary' not in aDict.keys(): # special case for heka if it sends payload as well as a summary, keep both but move payload to the details section. returndict[u'summary'] = toUnicode(v) elif k in ('payload'): returndict[u'details']['payload'] = toUnicode(v) if k in ('eventtime', 'timestamp', 'utctimestamp'): returndict[u'utctimestamp'] = toUTC(v).isoformat() returndict[u'timestamp'] = toUTC(v).isoformat() if k in ('hostname', 'source_host', 'host'): returndict[u'hostname'] = toUnicode(v) if k in ('tags'): if len(v) > 0: returndict[u'tags'] = v # nxlog keeps the severity name in syslogseverity,everyone else should use severity or level. if k in ('syslogseverity', 'severity', 'severityvalue', 'level'): returndict[u'severity'] = toUnicode(v).upper() if k in ('facility', 'syslogfacility'): returndict[u'facility'] = toUnicode(v) if k in ('pid', 'processid'): returndict[u'processid'] = toUnicode(v) # nxlog sets sourcename to the processname (i.e. sshd), everyone else should call it process name or pname if k in ('pname', 'processname', 'sourcename'): returndict[u'processname'] = toUnicode(v) # the file, or source if k in ('path', 'logger', 'file'): returndict[u'eventsource'] = toUnicode(v) if k in ('type', 'eventtype', 'category'): returndict[u'category'] = toUnicode(v) # custom fields as a list/array if k in ('fields', 'details'): if type(v) is not dict: returndict[u'details'][u'message'] = v else: if len(v) > 0: for details_key, details_value in v.iteritems(): returndict[u'details'][details_key] = details_value # custom fields/details as a one off, not in an array # i.e. fields.something=value or details.something=value # move them to a dict for consistency in querying if k.startswith('fields.') or k.startswith('details.'): newName = k.replace('fields.', '') newName = newName.lower().replace('details.', '') # add field with a special case for shippers that # don't send details # in an array as int/floats/strings # we let them dictate the data type with field_datatype # convention if newName.endswith('_int'): returndict[u'details'][unicode(newName)] = int(v) elif newName.endswith('_float'): returndict[u'details'][unicode(newName)] = float(v) else: returndict[u'details'][unicode(newName)] = toUnicode(v) # nxlog windows log handling if 'Domain' in aDict.keys() and 'SourceModuleType' in aDict.keys(): # nxlog parses all windows event fields very well # copy all fields to details returndict[u'details'][k] = v if 'utctimestamp' not in returndict.keys(): # default in case we don't find a reasonable timestamp returndict['utctimestamp'] = toUTC(datetime.now()).isoformat() except Exception as e: logger.exception('Received exception while normalizing message: %r' % e) logger.error('Malformed message: %r' % aDict) return None return returndict