Example #1
0
def extractIOC(path):
    extractor = URLExtract()
    try:
        out = execute_command('src\\strings64.exe ' + path)
    except:
        out = execute_command('src\\strings64.exe ' + path)
    out = out.decode("utf-8").split('\n')
    extract_url = []
    ipv4 = []
    ipv6 = []
    emails = []
    for url in iocextract.extract_urls(str(out), refang=True, strip=True):
        n = extractor.find_urls(url)
        try:
            n = n[0]
            n = str(n).replace("\\r", "")
            extract_url.append(n)
        except:
            pass
    extract_url = list(set(extract_url))
    for ip4 in iocextract.extract_ipv4s(str(out), refang=True):
        ipv4.append(ip4)
    for ip6 in iocextract.extract_ipv6s(str(out)):
        ipv6.append(ip6)
    for email in iocextract.extract_emails(str(out), refang=True):
        emails.append(str(email).replace("\\r", ""))
    return (extract_url, ipv4, ipv6, emails)
Example #2
0
    def test_ipv6_extract(self):

        content_list = [
            '2001:0db8:85a3:0000:0000:8a2e:0370:7334',
            '2001:db8:85a3:0:0:8a2e:370:7334',
            '2001:db8:85a3::8a2e:370:7334',
            '2001:db8::1',
            '2001:0db8::0001',
            '2001:db8:0:0:0:0:2:1',
            '2001:db8::2:1',
            '2001:db8:0000:1:1:1:1:1',
            '2001:db8:0:1:1:1:1:1',
            '2001:db8::1:0:0:1',
            '2001:db8:1234:0000:0000:0000:0000:0000',
            '2001:db8:1234:ffff:ffff:ffff:ffff:ffff',
            'fe80::1ff:fe23:4567:890a',
        ]

        for content in content_list:
            self.assertEqual(
                list(iocextract.extract_ipv6s(content))[0], content)
            self.assertEqual(
                list(iocextract.extract_ipv6s(_wrap_spaces(content)))[0],
                content)
            self.assertEqual(
                list(iocextract.extract_ipv6s(_wrap_tabs(content)))[0],
                content)
            self.assertEqual(
                list(iocextract.extract_ipv6s(_wrap_newlines(content)))[0],
                content)
            self.assertEqual(
                list(iocextract.extract_ipv6s(_wrap_nonwords(content)))[0],
                content)

        invalid_list = [
            '192.168.1',
            # Not caught
            '::1',
            '::',
        ]

        for content in invalid_list:
            self.assertEqual(len(list(iocextract.extract_ipv6s(content))), 0)
            self.assertEqual(
                len(list(iocextract.extract_ipv6s(_wrap_spaces(content)))), 0)
            self.assertEqual(
                len(list(iocextract.extract_ipv6s(_wrap_tabs(content)))), 0)
            self.assertEqual(
                len(list(iocextract.extract_ipv6s(_wrap_newlines(content)))),
                0)
    def _utility_ioc_extractor_function(self, event, *args, **kwargs):

        results = {}
        results["was_successful"] = False

        try:
            # Get the function parameters:
            incident_id = kwargs.get("incident_id")  # number
            text_string = kwargs.get("text_string")  # text

            log = logging.getLogger(__name__)  # Establish logging

            text_string = unicodedata.normalize(
                "NFKD",
                BeautifulSoup(text_string, "html.parser").get_text(
                    ' '))  # Strip HTML and normalize text

            # Parse IOCs by type from text_string - OrderedDict.fromkeys() preserves order and removes duplicates.
            results["ipv4s"] = list(
                OrderedDict.fromkeys(
                    list(iocextract.extract_ipv4s(text_string, refang=True))))
            results["ipv6s"] = list(
                OrderedDict.fromkeys(
                    list(iocextract.extract_ipv6s(text_string))))
            results["urls"] = list(
                OrderedDict.fromkeys(
                    list(iocextract.extract_urls(
                        text_string, refang=True))))  # URLs and domains
            results["domains"] = list(
                OrderedDict.fromkeys([
                    urlparse(url).netloc for url in results["urls"]
                ]))  # domains only
            results["email_addresses"] = list(
                OrderedDict.fromkeys(
                    list(iocextract.extract_emails(text_string, refang=True))))
            results["email_domains"] = list(
                OrderedDict.fromkeys([
                    email.split('@')[1] for email in results["email_addresses"]
                ]))  # domains only
            results["md5_hashes"] = list(
                OrderedDict.fromkeys(
                    list(iocextract.extract_md5_hashes(text_string))))
            results["sha256_hashes"] = list(
                OrderedDict.fromkeys(
                    list(iocextract.extract_sha256_hashes(text_string))))
            results["was_successful"] = True

            # Produce a FunctionResult with the results
            yield FunctionResult(results)
        except Exception:
            yield FunctionError()
Example #4
0
    def artifacts(self, raw):
        artifacts = []
        ipv4s = list(iocextract.extract_ipv4s(str(raw)))
        ipv6s = list(iocextract.extract_ipv6s(str(raw)))

        if ipv4s:
            ipv4s = list(dict.fromkeys(ipv4s))
            for i in ipv4s:
                artifacts.append(self.build_artifact('ip', str(i)))

        if ipv6s:
            ipv6s = list(dict.fromkeys(ipv6s))
            for j in ipv6s:
                artifacts.append(self.build_artifact('ip', str(j)))

        return artifacts
Example #5
0
def ioc_parse(line):
    """ Use library that can handle defanged formats for IOCs (Indicators of Compromise) """
    params = []
    formatted = line
    for url in iocextract.extract_urls(formatted, strip=True):
        refanged = iocextract.refang_url(url)
        param = get_ioc_param('url', url, formatted)
        param.append(refanged)
        params.append(param)
        formatted = '{}{}{}'.format(formatted[:param[0]], url,
                                    formatted[param[1]:])

    for ip in iocextract.extract_ipv4s(formatted):
        refanged = iocextract.refang_ipv4(ip)
        param = get_ioc_param('ip_address', ip, formatted)
        param.append(refanged)
        params.append(param)
        formatted = '{}{}{}'.format(formatted[:param[0]], ip,
                                    formatted[param[1]:])

    for ip in iocextract.extract_ipv6s(formatted):
        param = get_ioc_param('ip_address', ip, formatted)
        params.append(param)
        formatted = '{}{}{}'.format(formatted[:param[0]], ip,
                                    formatted[param[1]:])

    for email in iocextract.extract_emails(formatted):
        refanged = iocextract.refang_email(email)
        param = get_ioc_param('email', email, formatted)
        param.append(refanged)
        params.append(param)
        formatted = '{}{}{}'.format(formatted[:param[0]], email,
                                    formatted[param[1]:])

    for h in iocextract.extract_hashes(formatted):
        param = get_ioc_param('hash', h, formatted)
        params.append(param)
        formatted = '{}{}{}'.format(formatted[:param[0]], h,
                                    formatted[param[1]:])

    for rule in iocextract.extract_yara_rules(formatted):
        param = get_ioc_param('yara_rule', rule, formatted)
        params.append(param)
        formatted = '{}{}{}'.format(formatted[:param[0]], rule,
                                    formatted[param[1]:])

    return formatted, params
Example #6
0
 def test_ipv6(self):
     content = "2001:0db8:3c4d:0015:0000:0000:1a2f:1a2b"
     result = list(iocextract.extract_ipv6s(content))
     self.assertEqual(len(result), 1)
     self.assertEqual(result[0], content)
Example #7
0
            if(count > maximum - 1):
                break
            if(filename in skip_files):
                continue

            # Extract text from pdf
            filepath = os.path.join(path, filename)
            content = convert_pdf_txt(filepath)
            
            # Extract Indicators of Compromise from text, recording time
            extracted_files[filename] = {}
            extract_start_time = time.time()
            extracted_files[filename]["urls"] = list(iocextract.extract_urls(content, refang=True))
            extracted_files[filename]["email_addresses"] = list(iocextract.extract_emails(content, refang=True))
            extracted_files[filename]["ipv4s"] = list(iocextract.extract_ipv4s(content, refang=True))
            extracted_files[filename]["ipv6s"] = list(iocextract.extract_ipv6s(content))
            extracted_files[filename]["md5s"] = list(iocextract.extract_md5_hashes(content))
            extracted_files[filename]["sha1s"] = list(iocextract.extract_sha1_hashes(content))
            extracted_files[filename]["sha256s"] = list(iocextract.extract_sha256_hashes(content))
            extracted_files[filename]["sha512s"] = list(iocextract.extract_sha512_hashes(content))
            extracted_files[filename]["yara"] = list(iocextract.extract_yara_rules(content))
            extract_avg_numerator += time.time() - extract_start_time
            
            count += 1

        process_end_time = time.time()

        # add some meta info on process run time
        extracted_files["meta"] = {
            "tool": "iocextract",
            "files_examined": count,