def get_malwareurl_list(): """Produce a list of malware URLs from the MalShare feed. Returns: - url_list: (type: MalwareUrl list) list of malware URLs. """ try: payload = {'action': 'getsourcesraw', 'api_key': API_KEY} user_agent = {'User-agent': BASECONFIG.user_agent} LOGGING.info('Fetching latest MalShare list...') request = requests.get('https://malshare.com/api.php', params=payload, headers=user_agent) if request.status_code == 200: LOGGING.info('Processing MalShare list...') url_list = [] for line in request.text.split('\n'): url = clean_url(line.strip()) if url is None or len(url) == 0: continue host_name = get_host_from_url(url) ip_addr = resolve_dns(host_name) if ip_addr: LOGGING.info('Discovered malicious URL: {0}'.format(url)) url_list.append( MalwareUrl(host_name, ip_addr, url, 'Malshare')) return url_list else: LOGGING.error( 'Problem connecting to MalShare. Status code:{0}. Please try again later.' .format(request.status_code)) except requests.exceptions.ConnectionError as e: LOGGING.warning('Problem connecting to Malshare. Error: {0}'.format(e)) except Exception as e: LOGGING.warning('Problem connecting to Malshare. Aborting task.') LOGGING.exception(sys.exc_info()) LOGGING.exception(type(e)) LOGGING.exception(e.args) LOGGING.exception(e) return []
def get_malwareurl_list(): """Produce a list of malware URLs from the OTX feed. Returns: - url_list: (type: MalwareUrl list) list of malware URLs. """ try: ip_list, domain_list, url_list = get_otx_data() if len(domain_list) > 0: host_list = [] for domain in domain_list: ip_addr = resolve_dns(domain) if ip_addr: if ip_addr not in host_list: host_list.append(ip_addr) domain_urls = get_urls_for_ip(ip_addr, NAME) if len(domain_urls) > 0: url_list.extend(domain_urls) else: LOGGING.warning('OTX URL list (via domain) is empty.') if len(ip_list) > 0: for ip_addr in ip_list: ip_results = get_urls_for_ip(ip_addr, NAME) if len(ip_results) > 0: url_list.extend(ip_results) else: LOGGING.warning('OTX URL list (via IP) is empty.') return url_list except Exception as e: LOGGING.warning('Problem connecting to Cymon. Aborting task.') LOGGING.exception(sys.exc_info()) LOGGING.exception(type(e)) LOGGING.exception(e.args) LOGGING.exception(e) return []
def get_malwareurl_list(): """Produce a list of malware URLs from the URLhaus feed. Returns: - url_list: (type: MalwareUrl list) list of malware URLs. """ try: user_agent = {'User-agent': BASECONFIG.user_agent} LOGGING.info('Fetching latest URLhaus list...') request = requests.get( 'https://urlhaus.abuse.ch/downloads/csv_online/', headers=user_agent) if request.status_code == 200: LOGGING.info('Processing URLhaus list...') url_list = [] lines = request.text.split('\n') for line in lines: if line.startswith('#'): lines.remove(line) reader = csv.reader(lines, quotechar='"', delimiter=',', quoting=csv.QUOTE_ALL, skipinitialspace=True) next(reader) for item in reader: if len(item) > 1: if item[3] == 'offline': continue url = clean_url(item[2]) if url is None or len(url) == 0: continue date = dateutil.parser.parse(item[1]) valid_since = datetime.now() - timedelta( days=BASECONFIG.malware_days) if date > valid_since: host_name = get_host_from_url(url) ip_addr = resolve_dns(host_name) if ip_addr: LOGGING.info( 'Discovered malicious URL: {0}'.format(url)) url_list.append( MalwareUrl(host_name, ip_addr, url, NAME)) else: break return url_list else: LOGGING.error( 'Problem connecting to URLhaus. Status code:{0}. Please try again later.' .format(request.status_code)) except requests.exceptions.ConnectionError as e: LOGGING.warning('Problem connecting to URLhaus. Error: {0}'.format(e)) except Exception as e: LOGGING.warning('Problem connecting to URLhaus. Aborting task.') LOGGING.exception(sys.exc_info()) LOGGING.exception(type(e)) LOGGING.exception(e.args) LOGGING.exception(e) return []
def get_otx_data(): """Produce a list of IP addresses, domains and URLs from the OTX feed. Returns: - ip_list: (type: string list) list of IP addresses. - domain_list: (type: string list) list of domains. - url_list: (type: string list) list of URLs. """ try: LOGGING.info('Querying AlienVault OTX for recent pulses...') otx = OTXv2(API_KEY) pulses = otx.getsince( (datetime.utcnow() - timedelta(days=BASECONFIG.malware_days)).isoformat(), limit=None) stale_days = STALE_DAYS stale_since = (datetime.utcnow() - timedelta(days=stale_days)) domain_list = [] ip_list = [] url_list = [] LOGGING.info('Processing OTX pulses...') for pulse in pulses: if dateutil.parser.parse(pulse['created']) < stale_since: LOGGING.warning( 'Pulse added more than {0} days ago: {1} ({2})'.format( str(STALE_DAYS), pulse['name'], pulse['id'])) continue indicators = pulse['indicators'] if len(indicators) > 0: for indicator in indicators: if indicator['type'] == 'URL': url = clean_url(indicator['indicator']) if url is None: continue host_name = get_host_from_url(url) ip_addr = resolve_dns(host_name) if ip_addr: LOGGING.info( 'Discovered malicious URL: {0}'.format(url)) url_list.append( MalwareUrl(host_name, ip_addr, url, NAME)) host_indicators = ['domain', 'hostname', 'IPv4'] if indicator['type'] in host_indicators: if validators.ipv4(indicator['indicator']): ip_list.append(indicator['indicator']) else: host_name = indicator['indicator'] domain_list.append(host_name) return ip_list, domain_list, url_list except Exception as e: LOGGING.warning('Problem connecting to Cymon. Aborting task.') LOGGING.exception(sys.exc_info()) LOGGING.exception(type(e)) LOGGING.exception(e.args) LOGGING.exception(e) return [], [], []
def get_cymon_feed(jwt, feed_id, pages): """Produce a list of URLs for IPs found in the feed. Params: - jwt: (type: string) JWT token. - feed_id: (type: string) Cymon feed ID. - pages: (type: int) number of pages to retrieve. Returns: - url_list: (type: MalwareUrl list) list of malware URLs. """ try: today = datetime.utcnow() threshold = today - timedelta(days=BASECONFIG.malware_days) headers = {'Authorization': 'Bearer {0}'.format(jwt)} LOGGING.info('Fetching data from Cymon feed: {0}'.format(feed_id)) ip_list = [] for n in range(1, pages + 1): payload = { 'startDate': threshold.strftime('%Y-%m-%d'), 'endDate': today.strftime('%Y-%m-%d'), 'size': BATCH_SIZE, 'from': ( BATCH_SIZE * n - BATCH_SIZE)} request = requests.get( 'https://api.cymon.io/v2/ioc/search/feed/{0}'.format(feed_id), params=payload, headers=headers, verify=False) if request.status_code == 200: LOGGING.info('Request successful!') response = json.loads(request.text) if 'hits' in response: for feed_entry in response['hits']: if 'ioc' in feed_entry: if 'ip' in feed_entry['ioc']: mal_ip = feed_entry['ioc']['ip'] if mal_ip not in ip_list: ip_list.append(mal_ip) elif 'hostname' in feed_entry['ioc']: host_name = feed_entry['ioc']['hostname'] mal_ip = resolve_dns(host_name) if mal_ip: if mal_ip not in ip_list: ip_list.append(mal_ip) else: LOGGING.error( 'Problem connecting to Cymon. Status code:{0}. Please try again later.'.format( request.status_code)) if len(ip_list) > 0: url_list = [] for ip_addr in ip_list: ip_results = get_urls_for_ip(ip_addr, 'Cymon') if len(ip_results) > 0: url_list.extend(ip_results) return url_list else: LOGGING.warning('No hosts of interest.') except requests.exceptions.ConnectionError as e: LOGGING.warning('Problem connecting to Cymon. Error: {0}'.format(e)) except Exception as e: LOGGING.warning('Problem connecting to Cymon. Aborting task.') LOGGING.exception(sys.exc_info()) LOGGING.exception(type(e)) LOGGING.exception(e.args) LOGGING.exception(e) return []