def get_remaining_quota(self): """ Returns the number of hashes that could be queried within this run """ url = f'https://www.virustotal.com/api/v3/users/{self.api_key}/overall_quotas' headers = { 'Accept': 'application/json', 'x-apikey': self.api_key } # Get the quotas, if response code != 200, return 0 so we don't query further response = requests.get(url, headers=headers) if response.status_code == 200: json_response = response.json() else: self.logger.warning('Error retrieving VT Quota (HTTP Status code: %d)', response.status_code) return 0 # Extract the hourly, daily and monthly remaining quotas remaining_hourly = get_value('data.api_requests_hourly.user.allowed', json_response, 0) - get_value('data.api_requests_hourly.user.used', json_response, 0) remaining_daily = get_value('data.api_requests_daily.user.allowed', json_response, 0) - get_value('data.api_requests_daily.user.used', json_response, 0) remaining_monthly = get_value('data.api_requests_monthly.user.allowed', json_response, 0) - get_value('data.api_requests_monthly.user.used', json_response, 0) self.logger.debug('Remaining quotas: hourly(%d) / daily(%d) / monthly(%d)', remaining_hourly, remaining_daily, remaining_monthly) # Get the smallest one and return it remaining_min = min(remaining_hourly, remaining_daily, remaining_monthly) return remaining_min
def get_iplists(self): # pylint: disable=no-self-use """ Get all IP lists """ ip_lists = {} # Get all IPs except from tor es_query = { 'query': { 'bool': { 'must_not': [{ 'match': { 'iplist.name': 'tor' } }] } } } es_results = raw_search(es_query, index='redelk-iplist-*') if not es_results: return ip_lists for ip_doc in es_results['hits']['hits']: # pylint: disable=invalid-name ip = get_value('_source.iplist.ip', ip_doc) iplist_name = get_value('_source.iplist.name', ip_doc) # Already one IP found in this list, adding it if iplist_name in ip_lists: ip_lists[iplist_name].append(ip) # First IP for this IP list, creating the array else: ip_lists[iplist_name] = [ip] return ip_lists
def test(self, hash_list): """ run the query and build the report (results) """ # Get the remaining quota for this run remaining_quota = self.get_remaining_quota() vt_results = {} # Query VT API for file hashes count = 0 for md5 in hash_list: if count < remaining_quota: # Within quota, let's check the file hash with VT vt_result = self.get_vt_file_results(md5) if vt_result is not None: if isinstance(vt_result, type({})) and 'data' in vt_result: # Get first submission date first_submitted_ts = get_value('data.attributes.first_submission_date', vt_result, None) try: first_submitted_date = datetime.fromtimestamp(first_submitted_ts).isoformat() # pylint: disable=broad-except except Exception: first_submitted_date = None last_analysis_ts = get_value('data.attributes.last_analysis_date', vt_result, None) try: last_analysis_date = datetime.fromtimestamp(last_analysis_ts).isoformat() # pylint: disable=broad-except except Exception: last_analysis_date = None # Found vt_results[md5] = { 'record': vt_result, 'result': 'newAlarm', 'first_submitted': first_submitted_date, 'last_seen': last_analysis_date } else: vt_results[md5] = { 'result': 'clean' } else: # 404 not found vt_results[md5] = { 'result': 'clean' } vt_results[md5] = vt_result else: # Quota reached, skip the check vt_results[md5] = { 'result': 'skipped, quota reached' } count += 1 return vt_results
def sync_iplist(self, iplist='redteam'): """ Sync data between ES iplist and config files """ # Get data from config file iplist cfg_iplist = self.get_cfg_ips(iplist) # If the config file doesn't exist, skip the sync if cfg_iplist is None: return [] # Get data from ES iplist query = f'iplist.name:{iplist}' es_iplist_docs = get_query(query, size=10000, index='redelk-*') # Check if config IP is in ES and source = config_file es_iplist = [] for doc in es_iplist_docs: ip = get_value('_source.iplist.ip', doc) # pylint: disable=invalid-name if ip: es_iplist.append((ip, doc)) for ipc, comment in cfg_iplist: found = [item for item in es_iplist if ipc in item] if not found: self.logger.debug('IP not found in ES: %s', ipc) # if not, add it self.add_es_ip(ipc, iplist, comment) toadd = [] for ipe, doc in es_iplist: # Check if ES IP is in config file found = [item for item in cfg_iplist if ipe in item] if not found: # if not, check if source = config_file if get_value('_source.iplist.source', doc) == 'config_file': # if yes, remove IP from ES self.remove_es_ip(doc, iplist) else: # if not, add it comment = get_value('_source.iplist.comment', doc) if comment: ipa = f'{ipe} # From ES -- {comment}' else: ipa = f'{ipe} # From ES' toadd.append(ipa) self.add_cfg_ips(toadd, iplist) return toadd
def get_last_sync(self): """ Get greynoise data from ES if less than 1 day old """ es_query = { 'size': 1, 'sort': [{ '@timestamp': { 'order': 'desc' } }], 'query': { 'bool': { 'filter': [{ 'term': { 'iplist.name': 'tor' } }] } } } es_results = raw_search(es_query, index='redelk-*') self.logger.debug(es_results) # Return the latest hit or False if not found if es_results and len(es_results['hits']['hits']) > 0: dt_str = get_value('_source.@timestamp', es_results['hits']['hits'][0]) dtime = datetime.datetime.strptime(dt_str, '%Y-%m-%dT%H:%M:%S.%f') return dtime return datetime.datetime.fromtimestamp(0)
def enrich_beacon_data(self): """ Get all lines in rtops that have not been enriched yet (for CS) """ es_query = f'implant.id:* AND c2.program: cobaltstrike AND NOT c2.log.type:implant_newimplant AND NOT tags:{info["submodule"]}' not_enriched_results = get_query(es_query, size=10000, index='rtops-*') # Created a dict grouped by implant ID implant_ids = {} for not_enriched in not_enriched_results: implant_id = get_value('_source.implant.id', not_enriched) if implant_id in implant_ids: implant_ids[implant_id].append(not_enriched) else: implant_ids[implant_id] = [not_enriched] hits = [] # For each implant ID, get the initial beacon line for implant_id, implant_val in implant_ids.items(): initial_beacon_doc = self.get_initial_beacon_doc(implant_id) # If not initial beacon line found, skip the beacon ID if not initial_beacon_doc: continue for doc in implant_val: # Fields to copy: host.*, implant.*, process.*, user.* res = self.copy_data_fields(initial_beacon_doc, doc, ['host', 'implant', 'user', 'process']) if res: hits.append(res) return hits
def get_remaining_quota(self): """ Returns the number of hashes that could be queried within this run """ url = 'https://api.xforce.ibmcloud.com/all-subscriptions/usage' headers = { 'Accept': 'application/json', 'Authorization': self.basic_auth } # Get the quotas, if response code != 200, return 0 so we don't query further response = requests.get(url, headers=headers) if response.status_code == 200: json_response = response.json() else: self.logger.warning( 'Error retrieving IBM X-Force Quota (HTTP Status code: %d)', response.status_code) return 0 remaining_quota = 0 # Extract the hourly, daily and monthly remaining quotas for result in json_response: # Only take the relevant results (usageData for 'api' type) if 'subscriptionType' in result and result[ 'subscriptionType'] == 'api' and 'usageData' in result: # Get the monthly quota (limit) entitlement = get_value('usageData.entitlement', result, 0) remaining_quota += int(entitlement) # Get the usage array (per cycle) usage = get_value('usageData.usage', result, []) # Find the current cycle and remove the current usage from that cycle from the remaining quota for usage_cycle in usage: cycle = get_value('cycle', usage_cycle, 0) if cycle == datetime.now().strftime('%Y-%m'): current_usage = get_value('usage', usage_cycle, 0) remaining_quota -= int(current_usage) self.logger.debug('Remaining quota (monthly): %d', remaining_quota) return remaining_quota
def get_greynoise_data(self, ip_address): """ Get the data from greynoise for the IP """ # Malicious sample # { # "ip": "222.187.238.136", # "noise": true, # "riot": false, # "classification": "malicious", # "name": "unknown", # "link": "https://viz.greynoise.io/ip/222.187.238.136", # "last_seen": "2021-06-23", # "message": "Success" # } # # Benign sample # { # "ip": "8.8.8.8", # "noise": false, # "riot": true, # "classification": "benign", # "name": "Google Public DNS", # "link": "https://viz.greynoise.io/riot/8.8.8.8", # "last_seen": "2021-06-23", # "message": "Success" # } # # Unknown sample # { # "ip": "123.123.115.117", # "noise": false, # "riot": false, # "message": "IP not observed scanning the internet or contained in RIOT data set." # } try: gn_headers = { 'key': self.api_key, 'User-Agent': 'greynoise-redelk-enrichment' } gn_data = requests.get(f'{self.greynoise_url}{ip_address}', headers=gn_headers) json_result = gn_data.json() result = { 'ip': ip_address, 'noise': get_value('noise', json_result, False), 'riot': get_value('riot', json_result, False), 'classification': get_value('classification', json_result, 'unknown'), 'name': get_value('name', json_result, 'unknown'), 'link': get_value('link', json_result, 'unknown'), 'last_seen': get_value('last_seen', json_result, 'unknown'), 'message': get_value('message', json_result, 'unknown'), 'query_timestamp': int(time()) } return result # pylint: disable=broad-except except Exception as error: self.logger.error('Error getting greynoise IP %s', ip_address) self.logger.exception(error) return False
def alarm_check(self, alarmed_ips): # pylint: disable=no-self-use """ This check queries for IP's that aren't listed in any iplist* but do talk to c2* paths on redirectors """ es_query = { 'sort': [{'@timestamp': {'order': 'desc'}}], 'query': { 'bool': { 'filter': [ {'match': {'tags': 'enrich_iplists'}} ], 'must': { 'query_string': { 'fields': ['redir.backend.name'], 'query': 'c2*' } }, 'must_not': [{ 'query_string': { 'fields': ['tags'], 'query': 'iplist_*' } }, {'match': {'tags': info['submodule']}} ] } } } res = raw_search(es_query, index='redirtraffic-*') if res is None: not_enriched_hits = [] else: not_enriched_hits = res['hits']['hits'] # Created a dict grouped by IP address (from source.ip) ips = {} for not_enriched in not_enriched_hits: # pylint: disable=invalid-name ip = get_value('_source.source.ip', not_enriched) if ip in ips: ips[ip].append(not_enriched) else: ips[ip] = [not_enriched] hits = [] # Now we check if the IPs have already been alarmed in the past timeframe defined in the config # pylint: disable=invalid-name for ip, ip_val in ips.items(): # Not alarmed yet, process it if ip not in alarmed_ips: hits += ip_val # Return the array of new IP documents to be alarmed return hits
def get_remaining_quota(self): """ Returns the number of hashes that could be queried within this run """ url = 'https://www.hybrid-analysis.com/api/v2/key/current' headers = { 'Accept': 'application/json', 'User-Agent': 'RedELK', 'api-key': self.api_key } # Get the quotas, if response code != 200, return 0 so we don't query further response = requests.get(url, headers=headers) if response.status_code != 200: self.logger.warning( 'Error retrieving Hybrid Analysis Quota (HTTP Status code: %d)', response.status_code) return 0 api_limits_json = response.headers.get('api-limits') api_limits = json.loads(api_limits_json) # First check if the limit has been reached limit_reached = get_value('limit_reached', api_limits, False) if limit_reached: return 0 # Extract the limits and usage limits_minute = get_value('limits.minute', api_limits, 0) limits_hour = get_value('limits.hour', api_limits, 0) used_minute = get_value('used.minute', api_limits, 0) used_hour = get_value('used.hour', api_limits, 0) remaining_minute = limits_minute - used_minute remaining_hour = limits_hour - used_hour self.logger.debug('Remaining quotas: hour(%d) / minute(%d)', remaining_hour, remaining_minute) # Return the remaining quota per minute return remaining_minute
def get_es_tor_exitnodes(self): # pylint:disable=no-self-use """ get the tor exit nodes present in ES """ es_query = {'query': {'bool': {'filter': {'term': {'iplist.name': 'tor'}}}}} es_result = raw_search(es_query, index='redelk-*') if not es_result: return [] iplist = [] for ipdoc in es_result['hits']['hits']: ip = get_value('_source.iplist.ip', ipdoc) # pylint: disable=invalid-name iplist.append(ip) return iplist
def send_alarm(self, alarm): """ Send the alarm notification """ tmsg = pymsteams.connectorcard( config.notifications['msteams']['webhook_url']) description = alarm['info']['description'] if len(alarm['groupby']) > 0: description += f'\n *Please note that the items below have been grouped by: {pprint(alarm["groupby"])}*' tmsg.text(description) tmsg.color('red') try: for hit in alarm['hits']['hits']: tcs = pymsteams.cardsection() tcs.disableMarkdown() i = 0 title = hit['_id'] while i < len(alarm['groupby']): val = get_value(f'_source.{alarm["groupby"][i]}', hit) if i == 0: title = val else: title = f'{title} / {val}' i += 1 tcs.activityTitle(f'Alarm on item: {title}') # tcs.activitySubtitle(alarm['info']['description']) for field in alarm['fields']: val = get_value(f'_source.{field}', hit) tcs.addFact(field, pprint(val)) tmsg.addSection(tcs) # pylint: disable=broad-except except Exception as error: self.logger.exception(error) tmsg.title( f'[{config.project_name}] Alarm from {alarm["info"]["name"]} [{alarm["hits"]["total"]} hits]' ) tmsg.send()
def test(self, hash_list): """ run the query and build the report (results) """ # Get the remaining quota for this run remaining_quota = self.get_remaining_quota() ha_results = {} # Query HA API for file hashes count = 0 for md5 in hash_list: if count < remaining_quota: # Within quota, let's check the file hash with HA ha_result = self.get_ha_file_results(md5) # No results, let's return it clean if len(ha_result) == 0: ha_results[md5] = {'result': 'clean'} elif is_json(ha_result): # Loop through the results to get the first analysis (submission) date first_analysis_time = datetime.utcnow() for result in ha_result: analysis_start_time = get_value( 'analysis_start_time', result, None) if analysis_start_time is not None: analysis_start_time_date = parser.isoparse( analysis_start_time).replace(tzinfo=None) first_analysis_time = first_analysis_time if first_analysis_time < analysis_start_time_date else analysis_start_time_date # Found ha_results[md5] = { 'record': ha_result, 'result': 'newAlarm', 'first_submitted': first_analysis_time.isoformat(), # TO-DO: loop through the submissions to get the time 'last_seen' } else: # some horrible error # implement logging here continue else: # Quota reached, skip the check ha_results[md5] = {'result': 'skipped, quota reached'} count += 1 return ha_results
def group_hits(self, iocs, already_alarmed, already_checked): """ Returns all hits grouped by md5 hash """ md5_dict = {} md5_should_check = {} # Group all hits per md5 hash value for ioc in iocs: md5 = get_value('_source.file.hash.md5', ioc) if md5 in md5_dict: md5_dict[md5].append(ioc) else: md5_dict[md5] = [ioc] should_check = True # Check if the IOC has already been alarmed if md5 in already_alarmed: # Skip it should_check = False # Set the last checked date add_alarm_data(ioc, {}, info['submodule'], False) # Tag the doc as alarmed set_tags(info['submodule'], [ioc]) # Check if the IOC has already been checked within 'interval' if md5 in already_checked: # Skip if for now should_check = False if md5 in md5_should_check: md5_should_check[md5] = should_check & md5_should_check[md5] else: md5_should_check[md5] = should_check # self.logger.debug('Should check: %s' % md5ShouldCheck[h]) for md5 in dict.copy(md5_dict): # If we should not check the hash, remove it from the list if md5 in md5_should_check and not md5_should_check[md5]: self.logger.debug( '[%s] md5 hash already checked within interval or already alarmed previously, skipping', md5) del md5_dict[md5] return md5_dict
def get_alarmed_ips(self): # pylint: disable=no-self-use """ Returns all previous IPs that have been alarmed already """ es_query = { 'sort': [{ '@timestamp': { 'order': 'desc' } }], 'query': { 'bool': { 'filter': [{ 'range': { '@timestamp': { 'gte': 'now-1y' } } }, { 'match': { 'tags': info['submodule'] } }] } } } res = raw_search(es_query, index='redirtraffic-*') if res is None: alarmed_hits = [] else: alarmed_hits = res['hits']['hits'] # Created a dict grouped by IP address (from source.ip) ips = {} for alarmed_hit in alarmed_hits: # pylint: disable=invalid-name ip = get_value('_source.source.ip', alarmed_hit) if ip in ips: ips[ip].append(alarmed_hit) else: ips[ip] = [alarmed_hit] return ips
def enrich_tor(self, iplist): # pylint:disable=no-self-use """ Get all lines in redirtraffic that have not been enriched with 'enrich_iplist' or 'enrich_tor' Filter documents that were before the last run time of enrich_iplist (to avoid race condition) """ iplist_lastrun = get_last_run('enrich_iplists') query = { 'sort': [{ '@timestamp': { 'order': 'desc' } }], 'query': { 'bool': { 'filter': [{ 'range': { '@timestamp': { 'lte': iplist_lastrun.isoformat() } } }], 'must_not': [{ 'match': { 'tags': info['submodule'] } }] } } } res = raw_search(query, index='redirtraffic-*') if res is None: not_enriched = [] else: not_enriched = res['hits']['hits'] # For each IP, check if it is in tor exit node data hits = [] for not_e in not_enriched: ip = get_value('_source.source.ip', not_e) # pylint: disable=invalid-name if ip in iplist: hits.append(not_e) return hits
def test(self, hash_list): """ run the query and build the report (results) """ self.logger.debug('Checking IOCs on IBM X-Force: %s', hash_list) # Get the remaining quota for this run remaining_quota = self.get_remaining_quota() ibm_results = {} # Query VT API for file hashes count = 0 for md5 in hash_list: if count < remaining_quota: # Within quota, let's check the file hash with VT ibm_result = self.get_ibm_xforce_file_results(md5) if ibm_result is not None: if isinstance(ibm_result, type( {})) and 'malware' in ibm_result: # Get first submission date first_submitted_date = get_value( 'malware.created', ibm_results, None) # Found and marked as malware ibm_results[md5] = { 'record': ibm_result, 'result': 'newAlarm', 'first_submitted': first_submitted_date, } else: ibm_results[md5] = {'result': 'clean'} else: # 404 not found ibm_results[md5] = {'result': 'clean'} else: # Quota reached, skip the check ibm_results[md5] = {'result': 'skipped, quota reached'} count += 1 return ibm_results
def send_alarm(self, alarm): """ Send the alarm """ # Read the RedELK logo from file and base64 encode it with open(f'{FILEDIR}/redelk_white.png', 'rb') as logo_file: redelk_logo_b64 = base64.b64encode(logo_file.read()).decode('utf-8') mail = f''' <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> <title>Alarm from RedELK</title> <meta name="viewport" content="width=device-width, initial-scale=1.0"/> <style type="text/css"> #normal {{ font-family: Tahoma, Geneva, sans-serif; font-size: 16px; line-height: 24px; }} </style> </head> <body style="margin: 0; padding: 0;"> <table align="center" cellpadding="0" cellspacing="0" width="800" style="border-collapse: collapse;" style="max-width:800px;"> <tr> <td bgcolor="#212121" rowspan=2 width="120px" style="padding: 30px 30px 30px 30px; text-align:center;"> <img height="60px" src="data:image/png;base64,{redelk_logo_b64}" alt="img" /> </td> <td bgcolor="#212121" height="30px" style="color: #FAFAFA; font-family: Arial, sans-serif; font-size: 24px; padding: 30px 30px 0px 10px;"> RedELK alarm: <em>{alarm["info"]["name"]}</em> </td> </tr> <tr> <td bgcolor="#212121" height="20px" style="color: #FAFAFA; font-family: Arial, sans-serif; font-size: 16px; line-height: 20px; padding: 20px 30px 30px 10px;"> Project: <em>{project_name}</em><br/>Total hits: <em>{alarm["hits"]["total"]}</em> </td> </tr> <tr> <td colspan=2 style="color: #153643; font-family: Arial, sans-serif; font-size: 16px; line-height: 20px; padding: 0px 30px 0px 10px;"> <p>{alarm["info"]["description"]}</p> </td> </tr> ''' subject = f'Alarm from {alarm["info"]["name"]} [{alarm["hits"]["total"]} hits]' if len(alarm['groupby']) > 0: mail += f''' <tr> <td colspan=2 style="color: #153643; font-family: Arial, sans-serif; font-size: 12px; line-height: 16px; padding: 0px 15px 0px 15px;"> <p>Please note that the items below have been grouped by: {pprint(alarm["groupby"])}</p> </td> </tr> ''' try: for hit in alarm['hits']['hits']: index = 0 title = hit['_id'] while index < len(alarm['groupby']): val = get_value(f'_source.{alarm["groupby"][index]}', hit) if index == 0: title = val else: title = f'{title} / {val}' index += 1 mail += f''' <tr> <td bgcolor="#323232" colspan=2 style="color: #FAFAFA; font-family: Arial, sans-serif; font-size: 16px; line-height: 20px; padding: 10px 10px 10px 10px; text-align:center;"> <b>{title}</b> </td> </tr> ''' row = 0 for field in alarm['fields']: bgcolor = '#FAFAFA' if row % 2 == 0 else '#F1F1F1' val = get_value(f'_source.{field}', hit) value = json2html.convert(json=val) mail += f''' <tr bgcolor="{bgcolor}" style="color: #153643; font-family: Arial, sans-serif; font-size: 12px; line-height: 16px;"> <td style="padding: 10px 10px 10px 10px;"><b>{field}</b></td> <td style="padding: 10px 10px 10px 10px; white-space:pre-wrap; word-wrap:break-word">{value}</td> </tr> ''' row += 1 mail += '<tr><td colspan=2 style="padding: 15px;"> </td></tr>' mail += '</table>\n</body>\n</html>' except Exception as error: # pylint: disable=broad-except self.logger.error('Error sending email: %s', error) self.logger.exception(error) mail += '</body></html>\n' self.send_mail(notifications['email']['to'], mail, subject)
def enrich_greynoise(self): """ Get all lines in redirtraffic that have not been enriched with 'enrich_greynoise' Filter documents that were before the last run time of enrich_iplist (to avoid race condition) """ iplist_lastrun = get_last_run('enrich_iplists') es_query = { 'sort': [{'@timestamp': {'order': 'desc'}}], 'query': { 'bool': { 'filter': [ { 'range': { '@timestamp': { 'lte': iplist_lastrun.isoformat() } } } ], 'must_not': [ {'match': {'tags': info['submodule']}} ] } } } es_result = raw_search(es_query, index='redirtraffic-*') if es_result is None: not_enriched_results = [] else: not_enriched_results = es_result['hits']['hits'] # Created a dict grouped by IP address (from source.ip) ips = {} for not_enriched in not_enriched_results: # pylint: disable=invalid-name ip = get_value('_source.source.ip', not_enriched) if ip in ips: ips[ip].append(not_enriched) else: ips[ip] = [not_enriched] hits = [] # For each IP, get the greynoise data # pylint: disable=invalid-name for ip, ip_val in ips.items(): # If no ip, skip it if not ip: continue # Get data from redirtraffic if within interval last_es_data = self.get_last_es_data(ip) if not last_es_data: greynoise_data = self.get_greynoise_data(ip) else: greynoise_data = get_value('_source.source.greynoise', last_es_data) # If no greynoise data found, skip the IP if not greynoise_data: continue for doc in ip_val: # Fields to copy: greynoise.* es_result = self.add_greynoise_data(doc, greynoise_data) if es_result: hits.append(es_result) return hits