def get_redirtraffic(self): """ Get all redirtraffic before 'now' that were not processed by previous run of the module """ es_query = { 'sort': [{ '@timestamp': { 'order': 'desc' } }], 'query': { 'bool': { 'filter': [{ 'range': { '@timestamp': { 'lte': self.now.isoformat() } } }], 'must_not': [{ 'match': { 'tags': info['submodule'] } }] } } } es_results = raw_search(es_query, index='redirtraffic-*') self.logger.debug(es_results) if es_results is None: return [] return es_results['hits']['hits']
def get_last_sync(self): """ Get greynoise data from ES if less than 1 day old """ es_query = { 'size': 1, 'sort': [{ '@timestamp': { 'order': 'desc' } }], 'query': { 'bool': { 'filter': [{ 'term': { 'iplist.name': 'tor' } }] } } } es_results = raw_search(es_query, index='redelk-*') self.logger.debug(es_results) # Return the latest hit or False if not found if es_results and len(es_results['hits']['hits']) > 0: dt_str = get_value('_source.@timestamp', es_results['hits']['hits'][0]) dtime = datetime.datetime.strptime(dt_str, '%Y-%m-%dT%H:%M:%S.%f') return dtime return datetime.datetime.fromtimestamp(0)
def get_iplists(self): # pylint: disable=no-self-use """ Get all IP lists """ ip_lists = {} # Get all IPs except from tor es_query = { 'query': { 'bool': { 'must_not': [{ 'match': { 'iplist.name': 'tor' } }] } } } es_results = raw_search(es_query, index='redelk-iplist-*') if not es_results: return ip_lists for ip_doc in es_results['hits']['hits']: # pylint: disable=invalid-name ip = get_value('_source.iplist.ip', ip_doc) iplist_name = get_value('_source.iplist.name', ip_doc) # Already one IP found in this list, adding it if iplist_name in ip_lists: ip_lists[iplist_name].append(ip) # First IP for this IP list, creating the array else: ip_lists[iplist_name] = [ip] return ip_lists
def alarm_check(self, alarmed_ips): # pylint: disable=no-self-use """ This check queries for IP's that aren't listed in any iplist* but do talk to c2* paths on redirectors """ es_query = { 'sort': [{'@timestamp': {'order': 'desc'}}], 'query': { 'bool': { 'filter': [ {'match': {'tags': 'enrich_iplists'}} ], 'must': { 'query_string': { 'fields': ['redir.backend.name'], 'query': 'c2*' } }, 'must_not': [{ 'query_string': { 'fields': ['tags'], 'query': 'iplist_*' } }, {'match': {'tags': info['submodule']}} ] } } } res = raw_search(es_query, index='redirtraffic-*') if res is None: not_enriched_hits = [] else: not_enriched_hits = res['hits']['hits'] # Created a dict grouped by IP address (from source.ip) ips = {} for not_enriched in not_enriched_hits: # pylint: disable=invalid-name ip = get_value('_source.source.ip', not_enriched) if ip in ips: ips[ip].append(not_enriched) else: ips[ip] = [not_enriched] hits = [] # Now we check if the IPs have already been alarmed in the past timeframe defined in the config # pylint: disable=invalid-name for ip, ip_val in ips.items(): # Not alarmed yet, process it if ip not in alarmed_ips: hits += ip_val # Return the array of new IP documents to be alarmed return hits
def get_es_tor_exitnodes(self): # pylint:disable=no-self-use """ get the tor exit nodes present in ES """ es_query = {'query': {'bool': {'filter': {'term': {'iplist.name': 'tor'}}}}} es_result = raw_search(es_query, index='redelk-*') if not es_result: return [] iplist = [] for ipdoc in es_result['hits']['hits']: ip = get_value('_source.iplist.ip', ipdoc) # pylint: disable=invalid-name iplist.append(ip) return iplist
def enrich_tor(self, iplist): # pylint:disable=no-self-use """ Get all lines in redirtraffic that have not been enriched with 'enrich_iplist' or 'enrich_tor' Filter documents that were before the last run time of enrich_iplist (to avoid race condition) """ iplist_lastrun = get_last_run('enrich_iplists') query = { 'sort': [{ '@timestamp': { 'order': 'desc' } }], 'query': { 'bool': { 'filter': [{ 'range': { '@timestamp': { 'lte': iplist_lastrun.isoformat() } } }], 'must_not': [{ 'match': { 'tags': info['submodule'] } }] } } } res = raw_search(query, index='redirtraffic-*') if res is None: not_enriched = [] else: not_enriched = res['hits']['hits'] # For each IP, check if it is in tor exit node data hits = [] for not_e in not_enriched: ip = get_value('_source.source.ip', not_e) # pylint: disable=invalid-name if ip in iplist: hits.append(not_e) return hits
def get_alarmed_ips(self): # pylint: disable=no-self-use """ Returns all previous IPs that have been alarmed already """ es_query = { 'sort': [{ '@timestamp': { 'order': 'desc' } }], 'query': { 'bool': { 'filter': [{ 'range': { '@timestamp': { 'gte': 'now-1y' } } }, { 'match': { 'tags': info['submodule'] } }] } } } res = raw_search(es_query, index='redirtraffic-*') if res is None: alarmed_hits = [] else: alarmed_hits = res['hits']['hits'] # Created a dict grouped by IP address (from source.ip) ips = {} for alarmed_hit in alarmed_hits: # pylint: disable=invalid-name ip = get_value('_source.source.ip', alarmed_hit) if ip in ips: ips[ip].append(alarmed_hit) else: ips[ip] = [alarmed_hit] return ips
def get_last_es_data(self, ip_address): """ Get greynoise data from ES if less than 1 day old """ es_query = { 'size': 1, 'sort': [{'@timestamp': {'order': 'desc'}}], 'query': { 'bool': { 'filter': [ { 'range': { 'source.greynoise.query_timestamp': { 'gte': f'now-{self.cache}s', 'lte': 'now' } } }, { 'term': { 'tags': 'enrich_greynoise' } }, { 'term': { 'host.ip': ip_address } } ] } } } es_results = raw_search(es_query, index='redirtraffic-*') self.logger.debug(es_results) # Return the latest hit or False if not found if es_results and len(es_results['hits']['hits']) > 0: return es_results['hits']['hits'][0] return False
def alarm_check(self): """ This check queries public sources given a list of md5 hashes. If a hash was seen we set an alarm """ es_query = 'c2.log.type:ioc AND NOT tags:alarm_filehash AND ioc.type:file' alarmed_md5_q = { 'aggs': { 'interval_filter': { 'filter': { 'range': { 'alarm.last_checked': { 'gte': f'now-{self.interval}s', 'lt': 'now' } } }, 'aggs': { 'md5_interval': { 'terms': { 'field': 'file.hash.md5' } } } }, 'alarmed_filter': { 'filter': { 'terms': { 'tags': ['alarm_filehash'] } }, 'aggs': { 'md5_alarmed': { 'terms': { 'field': 'file.hash.md5' } } } } } } report = {} iocs = [] self.logger.debug('Running query %s', es_query) # First, get all IOCs of type 'file' that have not been alarmed yet iocs = get_query(es_query, 10000, index='rtops-*') self.logger.debug('found ioc: %s', iocs) # Then we get an aggregation of all md5 alarmed within the last 'interval' time self.logger.debug('Running query %s', alarmed_md5_q) already_alarmed_result = raw_search(alarmed_md5_q, index='rtops-*') already_checked = [] already_alarmed = [] if already_alarmed_result: self.logger.debug(already_alarmed_result['aggregations']) # add md5 hashes that have been checked within the 'interval' in 'already_checked' for hit in already_alarmed_result['aggregations'][ 'interval_filter']['md5_interval']['buckets']: already_checked.append(hit['key']) # add md5 hashes that have been alarmed previously in 'already_alarmed' for hit in already_alarmed_result['aggregations'][ 'alarmed_filter']['md5_alarmed']['buckets']: already_alarmed.append(hit['key']) # Group all hits per md5 hash md5_dict = self.group_hits(iocs, already_alarmed, already_checked) # Create an array with all md5 hashes to send to the different providers # we now have an array with unique md5's to go test md5_list = [] for md5 in md5_dict: md5_list.append(md5) self.logger.debug('md5 hashes to check: %s', md5_list) # Run the checks check_results = self.check_hashes(md5_list) # Get the alarmed hashes with their corresponding mutations alarmed_hashes = self.get_mutations(check_results) # Get the report report = self.build_report(md5_dict, alarmed_hashes) return report
def enrich_greynoise(self): """ Get all lines in redirtraffic that have not been enriched with 'enrich_greynoise' Filter documents that were before the last run time of enrich_iplist (to avoid race condition) """ iplist_lastrun = get_last_run('enrich_iplists') es_query = { 'sort': [{'@timestamp': {'order': 'desc'}}], 'query': { 'bool': { 'filter': [ { 'range': { '@timestamp': { 'lte': iplist_lastrun.isoformat() } } } ], 'must_not': [ {'match': {'tags': info['submodule']}} ] } } } es_result = raw_search(es_query, index='redirtraffic-*') if es_result is None: not_enriched_results = [] else: not_enriched_results = es_result['hits']['hits'] # Created a dict grouped by IP address (from source.ip) ips = {} for not_enriched in not_enriched_results: # pylint: disable=invalid-name ip = get_value('_source.source.ip', not_enriched) if ip in ips: ips[ip].append(not_enriched) else: ips[ip] = [not_enriched] hits = [] # For each IP, get the greynoise data # pylint: disable=invalid-name for ip, ip_val in ips.items(): # If no ip, skip it if not ip: continue # Get data from redirtraffic if within interval last_es_data = self.get_last_es_data(ip) if not last_es_data: greynoise_data = self.get_greynoise_data(ip) else: greynoise_data = get_value('_source.source.greynoise', last_es_data) # If no greynoise data found, skip the IP if not greynoise_data: continue for doc in ip_val: # Fields to copy: greynoise.* es_result = self.add_greynoise_data(doc, greynoise_data) if es_result: hits.append(es_result) return hits