def get_malwareurl_list(): """Produce a list of malware URLs from the MalShare feed. Returns: - url_list: (type: MalwareUrl list) list of malware URLs. """ try: payload = {'action': 'getsourcesraw', 'api_key': API_KEY} user_agent = {'User-agent': BASECONFIG.user_agent} LOGGING.info('Fetching latest MalShare list...') request = requests.get('https://malshare.com/api.php', params=payload, headers=user_agent) if request.status_code == 200: LOGGING.info('Processing MalShare list...') url_list = [] for line in request.text.split('\n'): url = clean_url(line.strip()) if url is None or len(url) == 0: continue host_name = get_host_from_url(url) ip_addr = resolve_dns(host_name) if ip_addr: LOGGING.info('Discovered malicious URL: {0}'.format(url)) url_list.append( MalwareUrl(host_name, ip_addr, url, 'Malshare')) return url_list else: LOGGING.error( 'Problem connecting to MalShare. Status code:{0}. Please try again later.' .format(request.status_code)) except requests.exceptions.ConnectionError as e: LOGGING.warning('Problem connecting to Malshare. Error: {0}'.format(e)) except Exception as e: LOGGING.warning('Problem connecting to Malshare. Aborting task.') LOGGING.exception(sys.exc_info()) LOGGING.exception(type(e)) LOGGING.exception(e.args) LOGGING.exception(e) return []
def get_malwareurl_list(): """Produce a list of malware URLs from the URLhaus feed. Returns: - url_list: (type: MalwareUrl list) list of malware URLs. """ try: user_agent = {'User-agent': BASECONFIG.user_agent} LOGGING.info('Fetching latest URLhaus list...') request = requests.get( 'https://urlhaus.abuse.ch/downloads/csv_online/', headers=user_agent) if request.status_code == 200: LOGGING.info('Processing URLhaus list...') url_list = [] lines = request.text.split('\n') for line in lines: if line.startswith('#'): lines.remove(line) reader = csv.reader(lines, quotechar='"', delimiter=',', quoting=csv.QUOTE_ALL, skipinitialspace=True) next(reader) for item in reader: if len(item) > 1: if item[3] == 'offline': continue url = clean_url(item[2]) if url is None or len(url) == 0: continue date = dateutil.parser.parse(item[1]) valid_since = datetime.now() - timedelta( days=BASECONFIG.malware_days) if date > valid_since: host_name = get_host_from_url(url) ip_addr = resolve_dns(host_name) if ip_addr: LOGGING.info( 'Discovered malicious URL: {0}'.format(url)) url_list.append( MalwareUrl(host_name, ip_addr, url, NAME)) else: break return url_list else: LOGGING.error( 'Problem connecting to URLhaus. Status code:{0}. Please try again later.' .format(request.status_code)) except requests.exceptions.ConnectionError as e: LOGGING.warning('Problem connecting to URLhaus. Error: {0}'.format(e)) except Exception as e: LOGGING.warning('Problem connecting to URLhaus. Aborting task.') LOGGING.exception(sys.exc_info()) LOGGING.exception(type(e)) LOGGING.exception(e.args) LOGGING.exception(e) return []
def get_otx_data(): """Produce a list of IP addresses, domains and URLs from the OTX feed. Returns: - ip_list: (type: string list) list of IP addresses. - domain_list: (type: string list) list of domains. - url_list: (type: string list) list of URLs. """ try: LOGGING.info('Querying AlienVault OTX for recent pulses...') otx = OTXv2(API_KEY) pulses = otx.getsince( (datetime.utcnow() - timedelta(days=BASECONFIG.malware_days)).isoformat(), limit=None) stale_days = STALE_DAYS stale_since = (datetime.utcnow() - timedelta(days=stale_days)) domain_list = [] ip_list = [] url_list = [] LOGGING.info('Processing OTX pulses...') for pulse in pulses: if dateutil.parser.parse(pulse['created']) < stale_since: LOGGING.warning( 'Pulse added more than {0} days ago: {1} ({2})'.format( str(STALE_DAYS), pulse['name'], pulse['id'])) continue indicators = pulse['indicators'] if len(indicators) > 0: for indicator in indicators: if indicator['type'] == 'URL': url = clean_url(indicator['indicator']) if url is None: continue host_name = get_host_from_url(url) ip_addr = resolve_dns(host_name) if ip_addr: LOGGING.info( 'Discovered malicious URL: {0}'.format(url)) url_list.append( MalwareUrl(host_name, ip_addr, url, NAME)) host_indicators = ['domain', 'hostname', 'IPv4'] if indicator['type'] in host_indicators: if validators.ipv4(indicator['indicator']): ip_list.append(indicator['indicator']) else: host_name = indicator['indicator'] domain_list.append(host_name) return ip_list, domain_list, url_list except Exception as e: LOGGING.warning('Problem connecting to Cymon. Aborting task.') LOGGING.exception(sys.exc_info()) LOGGING.exception(type(e)) LOGGING.exception(e.args) LOGGING.exception(e) return [], [], []
def get_malwareurl_list(): """Produce a list of malware URLs from the CleanMX virus feed. Returns: - url_list: (type: MalwareUrl list) list of malware URLs. """ try: url_list = [] LOGGING.info('Fetching Clean MX virus data...') feedparser.USER_AGENT = USER_AGENT feed = feedparser.parse( 'http://support.clean-mx.com/clean-mx/rss?scope=viruses') feed_entries = feed.entries LOGGING.info('Clean MX request OK.') valid_since = datetime.now(tz=timezone.utc) - timedelta( days=BASECONFIG.malware_days) for entry in feed_entries: if not hasattr(entry, 'published'): LOGGING.warning('Encountered incomplete entry in feed.') continue date = dateutil.parser.parse(entry.published) if date > valid_since: entry_data = entry.description.strip() url_search = re.search(r'url:\t(.*?)<br />', entry_data) addr_search = re.search( r'ip:\t(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})<br />', entry_data) if bool(url_search): url = clean_url(url_search.group(1)) if url is None: continue host_name = get_host_from_url(url) else: LOGGING.warning('Encountered invalid line in feed.') continue if bool(addr_search): ip_addr = addr_search.group(1) else: LOGGING.warning('Encountered invalid line in feed.') continue LOGGING.info('Discovered malicious URL: {0}'.format(url)) url_list.append(MalwareUrl(host_name, ip_addr, url, 'Clean MX')) else: break return url_list except Exception as e: LOGGING.warning('Problem connecting to Clean MX. Aborting task.') LOGGING.exception(sys.exc_info()) LOGGING.exception(type(e)) LOGGING.exception(e.args) LOGGING.exception(e) return []