def extractIOC(path): extractor = URLExtract() try: out = execute_command('src\\strings64.exe ' + path) except: out = execute_command('src\\strings64.exe ' + path) out = out.decode("utf-8").split('\n') extract_url = [] ipv4 = [] ipv6 = [] emails = [] for url in iocextract.extract_urls(str(out), refang=True, strip=True): n = extractor.find_urls(url) try: n = n[0] n = str(n).replace("\\r", "") extract_url.append(n) except: pass extract_url = list(set(extract_url)) for ip4 in iocextract.extract_ipv4s(str(out), refang=True): ipv4.append(ip4) for ip6 in iocextract.extract_ipv6s(str(out)): ipv6.append(ip6) for email in iocextract.extract_emails(str(out), refang=True): emails.append(str(email).replace("\\r", "")) return (extract_url, ipv4, ipv6, emails)
def test_ipv6_extract(self): content_list = [ '2001:0db8:85a3:0000:0000:8a2e:0370:7334', '2001:db8:85a3:0:0:8a2e:370:7334', '2001:db8:85a3::8a2e:370:7334', '2001:db8::1', '2001:0db8::0001', '2001:db8:0:0:0:0:2:1', '2001:db8::2:1', '2001:db8:0000:1:1:1:1:1', '2001:db8:0:1:1:1:1:1', '2001:db8::1:0:0:1', '2001:db8:1234:0000:0000:0000:0000:0000', '2001:db8:1234:ffff:ffff:ffff:ffff:ffff', 'fe80::1ff:fe23:4567:890a', ] for content in content_list: self.assertEqual( list(iocextract.extract_ipv6s(content))[0], content) self.assertEqual( list(iocextract.extract_ipv6s(_wrap_spaces(content)))[0], content) self.assertEqual( list(iocextract.extract_ipv6s(_wrap_tabs(content)))[0], content) self.assertEqual( list(iocextract.extract_ipv6s(_wrap_newlines(content)))[0], content) self.assertEqual( list(iocextract.extract_ipv6s(_wrap_nonwords(content)))[0], content) invalid_list = [ '192.168.1', # Not caught '::1', '::', ] for content in invalid_list: self.assertEqual(len(list(iocextract.extract_ipv6s(content))), 0) self.assertEqual( len(list(iocextract.extract_ipv6s(_wrap_spaces(content)))), 0) self.assertEqual( len(list(iocextract.extract_ipv6s(_wrap_tabs(content)))), 0) self.assertEqual( len(list(iocextract.extract_ipv6s(_wrap_newlines(content)))), 0)
def _utility_ioc_extractor_function(self, event, *args, **kwargs): results = {} results["was_successful"] = False try: # Get the function parameters: incident_id = kwargs.get("incident_id") # number text_string = kwargs.get("text_string") # text log = logging.getLogger(__name__) # Establish logging text_string = unicodedata.normalize( "NFKD", BeautifulSoup(text_string, "html.parser").get_text( ' ')) # Strip HTML and normalize text # Parse IOCs by type from text_string - OrderedDict.fromkeys() preserves order and removes duplicates. results["ipv4s"] = list( OrderedDict.fromkeys( list(iocextract.extract_ipv4s(text_string, refang=True)))) results["ipv6s"] = list( OrderedDict.fromkeys( list(iocextract.extract_ipv6s(text_string)))) results["urls"] = list( OrderedDict.fromkeys( list(iocextract.extract_urls( text_string, refang=True)))) # URLs and domains results["domains"] = list( OrderedDict.fromkeys([ urlparse(url).netloc for url in results["urls"] ])) # domains only results["email_addresses"] = list( OrderedDict.fromkeys( list(iocextract.extract_emails(text_string, refang=True)))) results["email_domains"] = list( OrderedDict.fromkeys([ email.split('@')[1] for email in results["email_addresses"] ])) # domains only results["md5_hashes"] = list( OrderedDict.fromkeys( list(iocextract.extract_md5_hashes(text_string)))) results["sha256_hashes"] = list( OrderedDict.fromkeys( list(iocextract.extract_sha256_hashes(text_string)))) results["was_successful"] = True # Produce a FunctionResult with the results yield FunctionResult(results) except Exception: yield FunctionError()
def artifacts(self, raw): artifacts = [] ipv4s = list(iocextract.extract_ipv4s(str(raw))) ipv6s = list(iocextract.extract_ipv6s(str(raw))) if ipv4s: ipv4s = list(dict.fromkeys(ipv4s)) for i in ipv4s: artifacts.append(self.build_artifact('ip', str(i))) if ipv6s: ipv6s = list(dict.fromkeys(ipv6s)) for j in ipv6s: artifacts.append(self.build_artifact('ip', str(j))) return artifacts
def ioc_parse(line): """ Use library that can handle defanged formats for IOCs (Indicators of Compromise) """ params = [] formatted = line for url in iocextract.extract_urls(formatted, strip=True): refanged = iocextract.refang_url(url) param = get_ioc_param('url', url, formatted) param.append(refanged) params.append(param) formatted = '{}{}{}'.format(formatted[:param[0]], url, formatted[param[1]:]) for ip in iocextract.extract_ipv4s(formatted): refanged = iocextract.refang_ipv4(ip) param = get_ioc_param('ip_address', ip, formatted) param.append(refanged) params.append(param) formatted = '{}{}{}'.format(formatted[:param[0]], ip, formatted[param[1]:]) for ip in iocextract.extract_ipv6s(formatted): param = get_ioc_param('ip_address', ip, formatted) params.append(param) formatted = '{}{}{}'.format(formatted[:param[0]], ip, formatted[param[1]:]) for email in iocextract.extract_emails(formatted): refanged = iocextract.refang_email(email) param = get_ioc_param('email', email, formatted) param.append(refanged) params.append(param) formatted = '{}{}{}'.format(formatted[:param[0]], email, formatted[param[1]:]) for h in iocextract.extract_hashes(formatted): param = get_ioc_param('hash', h, formatted) params.append(param) formatted = '{}{}{}'.format(formatted[:param[0]], h, formatted[param[1]:]) for rule in iocextract.extract_yara_rules(formatted): param = get_ioc_param('yara_rule', rule, formatted) params.append(param) formatted = '{}{}{}'.format(formatted[:param[0]], rule, formatted[param[1]:]) return formatted, params
def test_ipv6(self): content = "2001:0db8:3c4d:0015:0000:0000:1a2f:1a2b" result = list(iocextract.extract_ipv6s(content)) self.assertEqual(len(result), 1) self.assertEqual(result[0], content)
if(count > maximum - 1): break if(filename in skip_files): continue # Extract text from pdf filepath = os.path.join(path, filename) content = convert_pdf_txt(filepath) # Extract Indicators of Compromise from text, recording time extracted_files[filename] = {} extract_start_time = time.time() extracted_files[filename]["urls"] = list(iocextract.extract_urls(content, refang=True)) extracted_files[filename]["email_addresses"] = list(iocextract.extract_emails(content, refang=True)) extracted_files[filename]["ipv4s"] = list(iocextract.extract_ipv4s(content, refang=True)) extracted_files[filename]["ipv6s"] = list(iocextract.extract_ipv6s(content)) extracted_files[filename]["md5s"] = list(iocextract.extract_md5_hashes(content)) extracted_files[filename]["sha1s"] = list(iocextract.extract_sha1_hashes(content)) extracted_files[filename]["sha256s"] = list(iocextract.extract_sha256_hashes(content)) extracted_files[filename]["sha512s"] = list(iocextract.extract_sha512_hashes(content)) extracted_files[filename]["yara"] = list(iocextract.extract_yara_rules(content)) extract_avg_numerator += time.time() - extract_start_time count += 1 process_end_time = time.time() # add some meta info on process run time extracted_files["meta"] = { "tool": "iocextract", "files_examined": count,