Python extract_ipv4s 예제들, iocextract.extract_ipv4s Python 예제들

예제 #1

0

파일 보기

파일: tests.py 프로젝트: mokarimi/python-iocextract

    def test_ipv4_extract(self):
        content_list = [
            '127.0.0.1',
            '192.168.255.255',
            '1.1.1.1',
            '1[.]1[.]1[.]1',
            '1(.)1(.)1(.)1',
            '111[.]111[.]111[.]111',
            '111[.]111.111[.]111',
            '111[.111.]111[.111',
            '0.0.0.0',
            '100.100.100.100',
            '200.200.200.200',
            '200.201.210.209',
            '105.105.105.105',
            '250.250.250.250',
            '26.26.26.26',
            '255.255.255.255',
        ]

        for content in content_list:
            self.assertEqual(
                list(iocextract.extract_ipv4s(content))[0], content)
            self.assertEqual(
                list(iocextract.extract_ipv4s(_wrap_spaces(content)))[0],
                content)
            self.assertEqual(
                list(iocextract.extract_ipv4s(_wrap_tabs(content)))[0],
                content)
            self.assertEqual(
                list(iocextract.extract_ipv4s(_wrap_newlines(content)))[0],
                content)
            self.assertEqual(
                list(iocextract.extract_ipv4s(_wrap_words(content)))[0],
                content)
            self.assertEqual(
                list(iocextract.extract_ipv4s(_wrap_nonwords(content)))[0],
                content)

        invalid_list = [
            '192.168.1',
            '192.168.a.1',
            '11111.1111.1111.1111',
        ]

        for content in invalid_list:
            self.assertEqual(len(list(iocextract.extract_ipv4s(content))), 0)
            self.assertEqual(
                len(list(iocextract.extract_ipv4s(_wrap_spaces(content)))), 0)
            self.assertEqual(
                len(list(iocextract.extract_ipv4s(_wrap_tabs(content)))), 0)
            self.assertEqual(
                len(list(iocextract.extract_ipv4s(_wrap_newlines(content)))),
                0)

예제 #2

0

파일 보기

파일: file_check.py 프로젝트: ajaykumarkk/StaticAnalyzer

def extractIOC(path):
    extractor = URLExtract()
    try:
        out = execute_command('src\\strings64.exe ' + path)
    except:
        out = execute_command('src\\strings64.exe ' + path)
    out = out.decode("utf-8").split('\n')
    extract_url = []
    ipv4 = []
    ipv6 = []
    emails = []
    for url in iocextract.extract_urls(str(out), refang=True, strip=True):
        n = extractor.find_urls(url)
        try:
            n = n[0]
            n = str(n).replace("\\r", "")
            extract_url.append(n)
        except:
            pass
    extract_url = list(set(extract_url))
    for ip4 in iocextract.extract_ipv4s(str(out), refang=True):
        ipv4.append(ip4)
    for ip6 in iocextract.extract_ipv6s(str(out)):
        ipv6.append(ip6)
    for email in iocextract.extract_emails(str(out), refang=True):
        emails.append(str(email).replace("\\r", ""))
    return (extract_url, ipv4, ipv6, emails)

예제 #3

0

파일 보기

파일: DNSLookingglass_lookup.py 프로젝트: LaZyDK/TheHive-Cortex-Analyzers

    def artifacts(self, raw):
        artifacts = []
        ipv4s = list(iocextract.extract_ipv4s(str(raw)))

        if ipv4s:
            ipv4s = list(dict.fromkeys(ipv4s))
            for i in ipv4s:
                artifacts.append(self.build_artifact('ip', str(i)))

        return artifacts

예제 #4

0

파일 보기

파일: tests.py 프로젝트: deadbits/python-iocextract

    def test_refang_ipv4(self):
        content_list = [
            '111.111.111.111',
            '111[.]111[.]111[.]111',
            '111(.)111(.)111(.)111',
            '111[.]111[.]111[.]111',
            '111[.]111.111[.]111',
            '111[.111.]111[.111',
        ]

        for content in content_list:
            self.assertEquals(list(iocextract.extract_ipv4s(content, refang=True))[0], '111.111.111.111')
            self.assertEquals(iocextract.refang_ipv4(content), '111.111.111.111')

예제 #5

0

파일 보기

파일: utility_ioc_extractor.py 프로젝트: bladepixel/resilient

    def _utility_ioc_extractor_function(self, event, *args, **kwargs):

        results = {}
        results["was_successful"] = False

        try:
            # Get the function parameters:
            incident_id = kwargs.get("incident_id")  # number
            text_string = kwargs.get("text_string")  # text

            log = logging.getLogger(__name__)  # Establish logging

            text_string = unicodedata.normalize(
                "NFKD",
                BeautifulSoup(text_string, "html.parser").get_text(
                    ' '))  # Strip HTML and normalize text

            # Parse IOCs by type from text_string - OrderedDict.fromkeys() preserves order and removes duplicates.
            results["ipv4s"] = list(
                OrderedDict.fromkeys(
                    list(iocextract.extract_ipv4s(text_string, refang=True))))
            results["ipv6s"] = list(
                OrderedDict.fromkeys(
                    list(iocextract.extract_ipv6s(text_string))))
            results["urls"] = list(
                OrderedDict.fromkeys(
                    list(iocextract.extract_urls(
                        text_string, refang=True))))  # URLs and domains
            results["domains"] = list(
                OrderedDict.fromkeys([
                    urlparse(url).netloc for url in results["urls"]
                ]))  # domains only
            results["email_addresses"] = list(
                OrderedDict.fromkeys(
                    list(iocextract.extract_emails(text_string, refang=True))))
            results["email_domains"] = list(
                OrderedDict.fromkeys([
                    email.split('@')[1] for email in results["email_addresses"]
                ]))  # domains only
            results["md5_hashes"] = list(
                OrderedDict.fromkeys(
                    list(iocextract.extract_md5_hashes(text_string))))
            results["sha256_hashes"] = list(
                OrderedDict.fromkeys(
                    list(iocextract.extract_sha256_hashes(text_string))))
            results["was_successful"] = True

            # Produce a FunctionResult with the results
            yield FunctionResult(results)
        except Exception:
            yield FunctionError()

예제 #6

0

파일 보기

파일: ip_extract.py 프로젝트: TURROKS/IOC-Parser

def main(inp, out):
    for line in inp.readlines():

        for ip in iocextract.extract_ipv4s(line, refang=True):

            if ip not in common.IPs:

                common.IPs.append(ip)
                print(ip + ', ')
            else:
                print(ip + ' Already in List')

    out.write('\n#####IPs#####\n\n')
    for item in common.IPs:
        out.write('"' + item + '", \n')

예제 #7

0

파일 보기

def ioc_parse(line):
    """ Use library that can handle defanged formats for IOCs (Indicators of Compromise) """
    params = []
    formatted = line
    for url in iocextract.extract_urls(formatted, strip=True):
        refanged = iocextract.refang_url(url)
        param = get_ioc_param('url', url, formatted)
        param.append(refanged)
        params.append(param)
        formatted = '{}{}{}'.format(formatted[:param[0]], url,
                                    formatted[param[1]:])

    for ip in iocextract.extract_ipv4s(formatted):
        refanged = iocextract.refang_ipv4(ip)
        param = get_ioc_param('ip_address', ip, formatted)
        param.append(refanged)
        params.append(param)
        formatted = '{}{}{}'.format(formatted[:param[0]], ip,
                                    formatted[param[1]:])

    for ip in iocextract.extract_ipv6s(formatted):
        param = get_ioc_param('ip_address', ip, formatted)
        params.append(param)
        formatted = '{}{}{}'.format(formatted[:param[0]], ip,
                                    formatted[param[1]:])

    for email in iocextract.extract_emails(formatted):
        refanged = iocextract.refang_email(email)
        param = get_ioc_param('email', email, formatted)
        param.append(refanged)
        params.append(param)
        formatted = '{}{}{}'.format(formatted[:param[0]], email,
                                    formatted[param[1]:])

    for h in iocextract.extract_hashes(formatted):
        param = get_ioc_param('hash', h, formatted)
        params.append(param)
        formatted = '{}{}{}'.format(formatted[:param[0]], h,
                                    formatted[param[1]:])

    for rule in iocextract.extract_yara_rules(formatted):
        param = get_ioc_param('yara_rule', rule, formatted)
        params.append(param)
        formatted = '{}{}{}'.format(formatted[:param[0]], rule,
                                    formatted[param[1]:])

    return formatted, params

예제 #8

0

파일 보기

    def test_defang_dot(self):
        content = "192.168.0.1"
        combinations = [
            ["\."],
            ["(.)", "(.", ".)"],
            ["[.]", "[.", ".]"],
        ]

        for substitution_type in combinations:
            for defang_style in substitution_type:
                defanged_content = content.replace(".", defang_style)
                #print("checking: " + defanged_content)
                result = list(
                    iocextract.extract_ipv4s(defanged_content, refang=True))
                self.assertEqual(len(result), 1,
                                 "failed defang on: " + defang_style)
                self.assertEqual(result[0], content)

예제 #9

0

파일 보기

    def test_defang_unsupported_comma(self):
        content = "192.168.0.1"
        combinations = [
            ["(,(", "(,)", "),(", "),)", "(,", ",(", "),", ",)"],
            ["[,[", "[,]", "],[", "],]", "[,", ",[", "],", ",]"],
            ["{,{", "{,}", "},{", "},}", "{,", ",{", "},", ",}"],
        ]

        for substitution_type in combinations:
            for defang_style in substitution_type:
                defanged_content = content.replace(".", defang_style)
                #print("checking: " + defanged_content)
                result = list(
                    iocextract.extract_ipv4s(defanged_content, refang=True))
                self.assertNotEqual(
                    len(result), 1,
                    "should fail on defanging style : " + defang_style)

예제 #10

0

파일 보기

    def test_defang_unsupported_dot(self):
        content = "192.168.0.1"
        combinations = [
            ["(.(", ").(", ").)", ".("],
            ["[.[", "].[", "].]", ".[", "]."],
            ["{.{", "{.}", "}.{", "}.}", "{.", ".{", "}.", ".}"],
        ]

        for substitution_type in combinations:
            for defang_style in substitution_type:
                defanged_content = content.replace(".", defang_style)
                #print("checking: " + defanged_content)
                result = list(
                    iocextract.extract_ipv4s(defanged_content, refang=True))
                self.assertNotEqual(
                    len(result), 1,
                    "should fail on defanging style : " + defang_style)

예제 #11

0

파일 보기

파일: capesandbox.py 프로젝트: krishpranav/pif-analyzer

def CapeReporter(values):
    cape_val = []
    for usrInput in values:
        chk_ip = list(iocextract.extract_ipv4s(usrInput))
        chk_url = list(iocextract.extract_urls(usrInput))
        chk_md5 = list(iocextract.extract_md5_hashes(usrInput))
        chk_sha1 = list(iocextract.extract_sha1_hashes(usrInput))
        chk_256 = list(iocextract.extract_sha256_hashes(usrInput))
        if chk_url:
            usrInput = chk_url[0]
            argType = 'url'
            stream = allReport(usrInput, argType)
            for data in stream:
                cape_val.append({'Cape Sandbox': data})
        elif chk_ip:
            usrInput = chk_ip[0]
            argType = 'ip'
            stream = allReport(usrInput, argType)
            for data in stream:
                cape_val.append({'Cape Sandbox': data})
        elif chk_md5:
            usrInput = chk_md5[0]
            argType = 'md5'
            stream = allReport(usrInput, argType)
            for data in stream:
                cape_val.append({'Cape Sandbox': data})
        elif chk_sha1:
            usrInput = chk_sha1[0]
            argType = 'sha1'
            stream = allReport(usrInput, argType)
            for data in stream:
                cape_val.append({'Cape Sandbox': data})
        elif chk_256:
            usrInput = chk_256[0]
            argType = 'sha256'
            stream = allReport(usrInput, argType)
            for data in stream:
                cape_val.append({'Cape Sandbox': data})
        else:
            pass

    return cape_val

예제 #12

0

파일 보기

    def artifacts(self, raw):
        artifacts = []
        urls = list(iocextract.extract_urls(str(raw)))
        ipv4s = list(iocextract.extract_ipv4s(str(raw)))
        mail_addresses = list(iocextract.extract_emails(str(raw)))
        hashes = list(iocextract.extract_hashes(str(raw)))

        if urls:
            for u in urls:
                artifacts.append(self.build_artifact('url',str(u)))
        if ipv4s:
            for i in ipv4s:
                artifacts.append(self.build_artifact('ip',str(i)))
        if mail_addresses:
            for e in mail_addresses:
                artifacts.append(self.build_artifact('mail',str(e)))
        if hashes:
            for h in hashes:
                artifacts.append(self.build_artifact('hash',str(h)))
        return artifacts

예제 #13

0

파일 보기

파일: twitter_misp.py 프로젝트: ActorExpose/infosec-bazaar

def extract_text_indicators(username, tweet_id, text):
    indicator_list = []

    user_id = '@{0}'.format(username)
    tweet_url = 'https://twitter.com/{0}/status/{1}'.format(username, tweet_id)

    try:
        for ip in iocextract.extract_ipv4s(text, refang=True):
            if is_valid_ip(ip):
                indicator_list.append(
                    TwitterIndicator(user_id, tweet_url, 'IPv4', ip))

        for hash in iocextract.extract_hashes(text):
            hash_type = get_hash_type(hash)

            if hash_type:
                indicator_list.append(
                    TwitterIndicator(user_id, tweet_url, hash_type, hash))

        for url in iocextract.extract_urls(text, refang=True):
            if 'ghostbin.com' in url or 'pastebin.com' in url:
                paste_indicators = extract_paste_indicators(username, url)

                if len(paste_indicators) > 0:
                    indicator_list.extend(paste_indicators)

            url = apply_url_fixes(url)

            if is_valid_url(url):
                indicator_list.append(
                    TwitterIndicator(user_id, tweet_url, 'URL', url))

            elif INCLUDE_DOMAINS:
                if is_valid_domain(url):
                    indicator_list.append(
                        TwitterIndicator(user_id, tweet_url, 'HOST', url))

    except Exception as ex:
        LOGGER.warning('Exception parsing text: {0}'.format(ex))

    return indicator_list

예제 #14

0

파일 보기

    def artifacts(self, raw):
        if self.filename:
            return [
                self.build_artifact("file", self.filename),
            ]
        else:
            artifacts = []
            raw_str = str(raw)
            raw_str = raw_str.replace('\\"', '"')
            urls = set(iocextract.extract_urls(raw_str))
            ipv4s = set(iocextract.extract_ipv4s(raw_str))
            mail_addresses = set(iocextract.extract_emails(raw_str))

            if urls:
                for u in urls:
                    artifacts.append(self.build_artifact("url", str(u)))
            if ipv4s:
                for i in ipv4s:
                    artifacts.append(self.build_artifact("ip", str(i)))
            if mail_addresses:
                for e in mail_addresses:
                    artifacts.append(self.build_artifact("mail", str(e)))
            return artifacts

예제 #15

0

파일 보기

 def test_ipv4(self):
     content = "192.168.0.1"
     result = list(iocextract.extract_ipv4s(content))
     self.assertEqual(len(result), 1)
     self.assertEqual(result[0], content)

예제 #16

0

파일 보기

import fitz
import iocextract
from optparse import OptionParser

parser = OptionParser(usage='usage: python extractor [-f] file.pdf')
parser.add_option('-f', '--file', dest='filename', help='foo help')
(options, args) = parser.parse_args()
if not options.filename:  # if filename is not given
    parser.error('Filename not given')

doc = fitz.open(options.filename)
iocs = []

for page in range(doc.pageCount):
    pageread = doc.loadPage(page)
    text = pageread.getText("text")
    for ipv4 in iocextract.extract_ipv4s(text):
        iocs.append(ipv4)

for page in range(doc.pageCount):
    pageread = doc.loadPage(page)
    text = pageread.getText("text")
    for url in iocextract.extract_urls(text):
        iocs.append(url)

iocs = list(dict.fromkeys(iocs))
for i in iocs:
    print(i)

예제 #17

0

파일 보기

파일: CortexIOC.py 프로젝트: andreyglauzer/CortexIOC

    def start(self):
        self.logging()

        # Extraction
        if self.extract_all is not None \
         or self.ip is not None \
         or self.hash is not None \
         or self.domain is not None \
         or self.extract_file is not None:
            self.logger.info(
                'Checking the type of extraction will be performed.')

            if self.extract_file is not None:
                self.logger.info(
                    f'Obtaining IOC from file: {self.extract_file}')
                if os.path.exists(self.extract_file):
                    openfile = open(self.extract_file, 'r+')
                    all_text = openfile.read()
                    title = self.extract_file
                    file_name = self.extract_file
                else:
                    self.logger.error(
                        'The given directory or file was not found.')
            elif self.extract_url is not None:
                self.logger.info(
                    f'Obtaining IOC from WebSite: {self.extract_url}')
                self.driver.get(self.extract_url)
                soup = BeautifulSoup(self.driver.page_source, "html.parser")
                title = soup.find('title').get_text()
                all_text = self.select_all_text(soup=soup)
                file_name = self.extract_url

            if self.extract_all:
                self.driver.get(self.baseurl)
                count = 0
                for extract_iocs in iocextract.extract_iocs(all_text):
                    if '/' not in extract_iocs \
                     and '[at]' not in extract_iocs:
                        if len(
                                self.database.compare_ioc(
                                    IOC=extract_iocs.replace('[.]',
                                                             '.'))) == 0:
                            self.database.save_ioc(IOC=extract_iocs.replace(
                                '[.]', '.'),
                                                   signature=title,
                                                   tags="Extract from URL",
                                                   font="Extract",
                                                   type="IOCS",
                                                   file_name=file_name)

                            self.uploadIOC(
                                comment=f'IOC extraction: {title}',
                                IOC=extract_iocs.replace('[.]', '.'),
                                count=count,
                                name=extract_iocs.replace('[.]', '.'))
                            count += 1
                        else:
                            self.logger.debug(
                                f'IOC already registered: {extract_iocs}')
            elif self.domain:
                self.driver.get(self.baseurl)
                count = 0
                for extract_urls in iocextract.extract_urls(all_text):
                    if '/' not in extract_urls \
                     and '[at]' not in extract_urls:
                        if len(
                                self.database.compare_ioc(
                                    IOC=extract_urls.replace('[.]',
                                                             '.'))) == 0:
                            self.database.save_ioc(IOC=extract_urls.replace(
                                '[.]', '.'),
                                                   signature=title,
                                                   tags="Extract from URL",
                                                   font="Extract",
                                                   type="Domain",
                                                   file_name=file_name)

                            self.uploadIOC(
                                comment=f'IOC extraction: {title}',
                                IOC=extract_urls.replace('[.]', '.'),
                                count=count,
                                name=extract_urls.replace('[.]', '.'))
                            count += 1
                        else:
                            self.logger.debug(
                                f'IOC already registered: {extract_urls}')
            elif self.ip:
                self.driver.get(self.baseurl)
                count = 0
                for extract_ipv4s in iocextract.extract_ipv4s(all_text):
                    if '/' not in extract_ipv4s \
                     and '[at]' not in extract_ipv4s:
                        if len(
                                self.database.compare_ioc(
                                    IOC=extract_ipv4s.replace('[.]',
                                                              '.'))) == 0:
                            self.database.save_ioc(IOC=extract_ipv4s.replace(
                                '[.]', '.'),
                                                   signature=title,
                                                   tags="Extract from URL",
                                                   font="Extract",
                                                   type="ipv4",
                                                   file_name=file_name)

                            self.uploadIOC(
                                comment=f'IOC extraction: {title}',
                                IOC=extract_ipv4s.replace('[.]', '.'),
                                count=count,
                                name=extract_ipv4s.replace('[.]', '.'))
                            count += 1
                        else:
                            self.logger.debug(
                                f'IOC already registered: {extract_ipv4s}')
            elif self.hash:
                self.logger.info('Getting only the Hashes from the site.')
                self.driver.get(self.baseurl)
                count = 0
                for extract_hashes in iocextract.extract_hashes(all_text):
                    if '/' not in extract_hashes \
                     and '[at]' not in extract_hashes:
                        if len(
                                self.database.compare_ioc(
                                    IOC=extract_hashes.replace('[.]',
                                                               '.'))) == 0:
                            self.database.save_ioc(IOC=extract_hashes.replace(
                                '[.]', '.'),
                                                   signature=title,
                                                   tags="Extract from URL",
                                                   font="Extract",
                                                   type="Hash",
                                                   file_name=file_name)

                            self.uploadIOC(
                                comment=f'IOC extraction: {title}',
                                IOC=extract_hashes.replace('[.]', '.'),
                                count=count,
                                name=extract_hashes.replace('[.]', '.'))
                            count += 1
                        else:
                            self.logger.debug(
                                f'IOC already registered: {extract_hashes}')

        if self.feed is not None:
            # MalwareBaazar
            count = 0
            for iocs in MalwareBaazar().start:
                if len(self.database.compare_ioc(
                        IOC=iocs['sha256_hash'])) == 0:
                    comment = "Name: {name}, signature: {signature}, tags: {tags}, font: {font}".format(
                        name=iocs['file_name'],
                        signature=iocs['signature'],
                        tags=iocs['tags'],
                        font='Bazaar')

                    self.database.save_ioc(file_name=iocs['file_name'],
                     IOC=iocs['sha256_hash'],
                     signature=iocs['signature'],
                     tags=str(iocs['tags']).replace("'",'') \
                      .replace('[','') \
                      .replace(']',''),
                     font='Bazaar',
                     type="Hash")

                    self.uploadIOC(comment=comment,
                                   IOC=iocs['sha256_hash'],
                                   count=count,
                                   name=iocs['file_name'])
                    count += 1
                else:
                    self.logger.debug(
                        f"IOC already registered: {iocs['sha256_hash']}")

            # Circl
            for feed in MISPFeed(
                    url="https://www.circl.lu/doc/misp/feed-osint/").start:
                request = requests.get(feed,
                                       headers={
                                           'User-Agent': 'Mozilla/5.0'
                                       }).json()

                count = 0
                for iocs in request['Event']['Attribute']:
                    if iocs['category'] == 'Payload delivery':
                        if '.' not in iocs['value'] \
                         and len(iocs['value']) == 32 \
                         or len(iocs['value']) == 64:

                            if len(self.database.compare_ioc(
                                    IOC=iocs['value'])) == 0:
                                comment = "Name: {name}, signature: {signature}, tags: {tags}, font: {font}".format(
                                    name=iocs['comment'].split(' ')[0],
                                    signature=iocs['category'],
                                    tags=iocs['category'],
                                    font="Circl")

                                self.database.save_ioc(
                                    file_name=iocs['comment'].split(' ')[0],
                                    IOC=iocs['value'],
                                    signature=iocs['category'],
                                    tags=iocs['category'],
                                    font="Circl",
                                    type="Hash")

                                self.uploadIOC(
                                    comment=comment,
                                    IOC=iocs['value'],
                                    count=count,
                                    name=iocs['comment'].split(' ')[0])
                                count += 1
                            else:
                                self.logger.debug(
                                    f"IOC already registered: {iocs['value']}")

                    elif iocs['category'] == 'External analysis':
                        if 'virustotal' in iocs['value']:
                            hash = iocs['value'].split('/')[4]
                            if len(self.database.compare_ioc(IOC=hash)) == 0:
                                comment = "Name: {name}, signature: {signature}, tags: {tags}, font: {font}".format(
                                    name=iocs['comment'].split(' ')[0],
                                    signature=iocs['category'],
                                    tags=iocs['category'],
                                    font="Circl")

                                self.database.save_ioc(
                                    file_name=iocs['comment'].split(' ')[0],
                                    IOC=hash,
                                    signature=iocs['category'],
                                    tags=iocs['category'],
                                    font="Circl",
                                    type="Hash")

                                self.uploadIOC(
                                    comment=comment,
                                    IOC=iocs['value'],
                                    count=count,
                                    name=iocs['comment'].split(' ')[0])
                                count += 1
                            else:
                                self.logger.debug(
                                    f"IOC already registered: {iocs['value']}")

                    elif iocs['category'] == 'Artifacts dropped':
                        hash = iocs['value']
                        if len(self.database.compare_ioc(IOC=hash)) == 0:
                            comment = "Name: {name}, signature: {signature}, tags: {tags}, font: {font}".format(
                                name=iocs['comment'].split(' ')[0],
                                signature=iocs['category'],
                                tags=iocs['category'],
                                font="Circl")

                            self.database.save_ioc(
                                file_name=iocs['comment'].split(' ')[0],
                                IOC=hash,
                                signature=iocs['category'],
                                tags=iocs['category'],
                                font="Circl",
                                type="Hash")

                            self.uploadIOC(comment=comment,
                                           IOC=iocs['value'],
                                           count=count,
                                           name=iocs['comment'].split(' ')[0])
                            count += 1
                        else:
                            self.logger.debug(
                                f'IOC already registered: {hash}')

예제 #18

0

파일 보기

        for filename in os.listdir(path):
            if(count > maximum - 1):
                break
            if(filename in skip_files):
                continue

            # Extract text from pdf
            filepath = os.path.join(path, filename)
            content = convert_pdf_txt(filepath)
            
            # Extract Indicators of Compromise from text, recording time
            extracted_files[filename] = {}
            extract_start_time = time.time()
            extracted_files[filename]["urls"] = list(iocextract.extract_urls(content, refang=True))
            extracted_files[filename]["email_addresses"] = list(iocextract.extract_emails(content, refang=True))
            extracted_files[filename]["ipv4s"] = list(iocextract.extract_ipv4s(content, refang=True))
            extracted_files[filename]["ipv6s"] = list(iocextract.extract_ipv6s(content))
            extracted_files[filename]["md5s"] = list(iocextract.extract_md5_hashes(content))
            extracted_files[filename]["sha1s"] = list(iocextract.extract_sha1_hashes(content))
            extracted_files[filename]["sha256s"] = list(iocextract.extract_sha256_hashes(content))
            extracted_files[filename]["sha512s"] = list(iocextract.extract_sha512_hashes(content))
            extracted_files[filename]["yara"] = list(iocextract.extract_yara_rules(content))
            extract_avg_numerator += time.time() - extract_start_time
            
            count += 1

        process_end_time = time.time()

        # add some meta info on process run time
        extracted_files["meta"] = {
            "tool": "iocextract",