def _sniff_text(text):
    """ checks every regex for findings, and return a dictionary of all findings """
    results = {}
    if (args.ioc):
        print("")
        urls = list(iocextract.extract_urls(text))
        ips = list(iocextract.extract_ips(text))
        emails = list(iocextract.extract_emails(text))
        hashes = list(iocextract.extract_hashes(text))
        rules = list(iocextract.extract_yara_rules(text))
        if (urls):
            results.update({"urls": urls})
        if (ips):
            results.update({"ips": ips})
        if (emails):
            results.update({"emails": emails})
        if (hashes):
            results.update({"hashes": hashes})
        if (rules):
            results.update({"rules": rules})

    else:
        for key, value in regexList.items():
            findings = set(re.findall(value, text))
            if findings:
                results.update({key: findings})
    return results
예제 #2
0
def main(inp, out):

    for line in inp.readlines():

        for new_hash in iocextract.extract_hashes(line):

            if new_hash not in common.Hashes:

                common.Hashes.append(new_hash)
                print(new_hash + ', ')
            else:
                print(new_hash + ' Already in List')

    out.write('#####HASHES#####\n\n')
    for item in common.Hashes:
        out.write('"' + item + '", \n')
예제 #3
0
def ioc_parse(line):
    """ Use library that can handle defanged formats for IOCs (Indicators of Compromise) """
    params = []
    formatted = line
    for url in iocextract.extract_urls(formatted, strip=True):
        refanged = iocextract.refang_url(url)
        param = get_ioc_param('url', url, formatted)
        param.append(refanged)
        params.append(param)
        formatted = '{}{}{}'.format(formatted[:param[0]], url,
                                    formatted[param[1]:])

    for ip in iocextract.extract_ipv4s(formatted):
        refanged = iocextract.refang_ipv4(ip)
        param = get_ioc_param('ip_address', ip, formatted)
        param.append(refanged)
        params.append(param)
        formatted = '{}{}{}'.format(formatted[:param[0]], ip,
                                    formatted[param[1]:])

    for ip in iocextract.extract_ipv6s(formatted):
        param = get_ioc_param('ip_address', ip, formatted)
        params.append(param)
        formatted = '{}{}{}'.format(formatted[:param[0]], ip,
                                    formatted[param[1]:])

    for email in iocextract.extract_emails(formatted):
        refanged = iocextract.refang_email(email)
        param = get_ioc_param('email', email, formatted)
        param.append(refanged)
        params.append(param)
        formatted = '{}{}{}'.format(formatted[:param[0]], email,
                                    formatted[param[1]:])

    for h in iocextract.extract_hashes(formatted):
        param = get_ioc_param('hash', h, formatted)
        params.append(param)
        formatted = '{}{}{}'.format(formatted[:param[0]], h,
                                    formatted[param[1]:])

    for rule in iocextract.extract_yara_rules(formatted):
        param = get_ioc_param('yara_rule', rule, formatted)
        params.append(param)
        formatted = '{}{}{}'.format(formatted[:param[0]], rule,
                                    formatted[param[1]:])

    return formatted, params
예제 #4
0
    def test_hash_extract(self):
        content = """
            68b329da9893e34099c7d8ad5cb9c940
            adc83b19e793491b1c6ea0fd8b46cd9f32e592fc
            01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b
            be688838ca8686e5c90689bf2ab585cef1137c999b48c70b92f67a5c34dc15697b5d11c982ed6d71be1e1e7f7b4e0733884aa97c3f7a339a8ed03577cf74be09
        """

        processed = list(iocextract.extract_hashes(content))

        self.assertEqual(len(processed), 4)
        self.assertEqual(processed[0], '68b329da9893e34099c7d8ad5cb9c940')

        processed = list(iocextract.extract_iocs(content))

        self.assertEqual(len(processed), 4)
        self.assertEqual(processed[0], '68b329da9893e34099c7d8ad5cb9c940')
예제 #5
0
def main():

    # Parse input file
    stix_package = STIXPackage.from_xml(FILENAME)

    # Convert STIXPackage to a Python
    stix_dict = stix_package.to_dict()

    #Extract description from the indicator (suitable for indicator only)
    description = stix_dict["indicators"][0]["description"]
    # Convert the first STIXPackage dictionary into another STIXPackage via
    # the from_dict() method.

    # Pattern for domain / email and IP addresses
    raw_iocs = re.findall(
        r'[a-zA-Z0-9-\.]*\[\.?\@?\][a-zA-Z0-9-\.\[\.\@\]]*[-a-zA-Z0-9@:%_\+.~#?&//=]*',
        description)

    print(len(raw_iocs))

    for i in range(len(raw_iocs)):
        # Replace the on9 strings
        for on9string in on9strings:
            raw_iocs[i] = raw_iocs[i].replace(on9string, on9strings[on9string])
        # Import those IOCs into the array.
        if re.match(r'.*[@]+', raw_iocs[i]):
            iocs['email'].append(raw_iocs[i])
        elif re.match(r'.*[//].*', raw_iocs[i]):
            iocs['url'].append(raw_iocs[i])
        elif re.match(r'.*[a-zA-Z]', raw_iocs[i]):
            iocs['domain'].append(raw_iocs[i])

    #Extract hashes by their plugin
    for hash_extracted in iocextract.extract_hashes(description):
        iocs['hash'].append(hash_extracted)
    #Extract Yara rule
    for yara_extracted in iocextract.extract_yara_rules(description):
        iocs['yara'].append(yara_extracted)
    #Extract IP
    for ip_extracted in iocextract.extract_ips(description, refang=True):
        iocs['ip'].append(ip_extracted)

    for key in iocs:
        for item in iocs[key]:
            print(key + ":" + item)
예제 #6
0
    def artifacts(self, raw):
        artifacts = []
        urls = list(iocextract.extract_urls(str(raw)))
        ipv4s = list(iocextract.extract_ipv4s(str(raw)))
        mail_addresses = list(iocextract.extract_emails(str(raw)))
        hashes = list(iocextract.extract_hashes(str(raw)))

        if urls:
            for u in urls:
                artifacts.append(self.build_artifact('url',str(u)))
        if ipv4s:
            for i in ipv4s:
                artifacts.append(self.build_artifact('ip',str(i)))
        if mail_addresses:
            for e in mail_addresses:
                artifacts.append(self.build_artifact('mail',str(e)))
        if hashes:
            for h in hashes:
                artifacts.append(self.build_artifact('hash',str(h)))
        return artifacts
예제 #7
0
def extract_text_indicators(username, tweet_id, text):
    indicator_list = []

    user_id = '@{0}'.format(username)
    tweet_url = 'https://twitter.com/{0}/status/{1}'.format(username, tweet_id)

    try:
        for ip in iocextract.extract_ipv4s(text, refang=True):
            if is_valid_ip(ip):
                indicator_list.append(
                    TwitterIndicator(user_id, tweet_url, 'IPv4', ip))

        for hash in iocextract.extract_hashes(text):
            hash_type = get_hash_type(hash)

            if hash_type:
                indicator_list.append(
                    TwitterIndicator(user_id, tweet_url, hash_type, hash))

        for url in iocextract.extract_urls(text, refang=True):
            if 'ghostbin.com' in url or 'pastebin.com' in url:
                paste_indicators = extract_paste_indicators(username, url)

                if len(paste_indicators) > 0:
                    indicator_list.extend(paste_indicators)

            url = apply_url_fixes(url)

            if is_valid_url(url):
                indicator_list.append(
                    TwitterIndicator(user_id, tweet_url, 'URL', url))

            elif INCLUDE_DOMAINS:
                if is_valid_domain(url):
                    indicator_list.append(
                        TwitterIndicator(user_id, tweet_url, 'HOST', url))

    except Exception as ex:
        LOGGER.warning('Exception parsing text: {0}'.format(ex))

    return indicator_list
예제 #8
0
def extract(filein, fileout):

    # Setting up extractation of text from pdf
    rsrcmgr = PDFResourceManager()
    retstr = StringIO()
    codec = 'utf-8'  # 'utf16','utf-8'
    laparams = LAParams()
    device = TextConverter(rsrcmgr, retstr, codec=codec, laparams=laparams)

    # open file
    f = open(filein, mode='rb')
    interpreter = PDFPageInterpreter(rsrcmgr, device)
    for page in PDFPage.get_pages(f):
        interpreter.process_page(page)
    f.close()
    device.close()
    text = retstr.getvalue()
    retstr.close()

    # open/create output file
    fout = open(fileout + ".txt", mode="wb")

    fout.write(b"=== IP ===\n")
    for ip in iocextract.extract_ips(text, refang=True):
        # print(ip)
        fout.write(ip.encode("latin-1") + b"\n")

    fout.write(b"=== URL ===\n")
    for url in iocextract.extract_urls(text, refang=True):
        # print(url)
        fout.write(url.encode("latin-1") + b"\n")

    fout.write(b"=== Hashes ===\n")
    for _hash in iocextract.extract_hashes(text):
        # print(_hash)
        fout.write(_hash.encode("latin-1") + b"\n")

    fout.close()
    return
예제 #9
0
    def each(self, target):
        self.results = dict()

        # combine strings into one space-separated string
        target_strings = ' '.join(list(_strings(target)))

        # extract and add iocs
        iocs = []
        iocs.extend(list(iocextract.extract_ips(target_strings)))
        iocs.extend(list(iocextract.extract_emails(target_strings)))
        iocs.extend(list(iocextract.extract_hashes(target_strings)))
        iocs.extend(list(iocextract.extract_yara_rules(target_strings)))
        # iocs.extend(list(iocextract.extract_urls(target_strings)))
        iocs[:] = (value for value in iocs if value not in blacklist)

        # extract and add iocs
        self.results['iocs'] = iocs

        # Add observables
        for ioc in self.results['iocs']:
            self.add_ioc(ioc)  # TODO: tag
        return True
예제 #10
0
    def check_clippy(iocs):
        last_text = ''

        while True:
            iocs_found = False
            urls, ips, emails, hashes = None, None, None, None
            text = clipboard.wait_for_text()

            # If there's text and it has not already been parsed
            if text is not None and text != last_text:
                urls = iter_check(extract_urls(text, refang=True))
                if urls is not None:
                    iocs = iocs + [u for u in urls]
                    iocs_found = True

                ips = iter_check(extract_ips(text, refang=True))
                if ips is not None:
                    iocs = iocs + [i for i in ips]
                    iocs_found = True
                
                emails = iter_check(extract_emails(text, refang=True))
                if emails is not None:
                    iocs = iocs + [e for e in emails]
                    iocs_found = True

                hashes = iter_check(extract_hashes(text))
                if hashes is not None:
                    iocs = iocs + [h for h in hashes]
                    iocs_found = True

                if iocs_found:
                    GLib.idle_add(win.submit_iocs, list(set(iocs)))

            iocs = []
            last_text = text
            time.sleep(1)
    def parse_indicators_from_description_string(self, description_string,
                                                 title):

        # print type(description_string)

        iocs = {
            'title': title,
            'domain': [],
            'ip': [],
            'email': [],
            'hash': [],
            'url': [],
            'hash': [],
            'yara': [],
            'other': []
        }
        on9strings = {'[.]': '.', 'hxxp': 'http', '[@]': '@'}

        # Convert the first STIXPackage dictionary into another STIXPackage via the from_dict() method.
        # Pattern for domain / email and IP addresses
        raw_iocs = re.findall(
            r'[a-zA-Z0-9-\.]*\[\.?\@?\][a-zA-Z0-9-\.\[\.\@\]]*[-a-zA-Z0-9@:%_\+.~#?&//=]*',
            description_string)

        # print(len(raw_iocs))

        # for i in range(len(raw_iocs)):
        #     # Replace the on9 strings
        #     for on9string in on9strings:
        #         raw_iocs[i] = raw_iocs[i].replace(on9string, on9strings[on9string])

        #     # Import those IOCs into the array.
        #     if re.match(r'.*[@]+', raw_iocs[i]):
        #         iocs['email'].append(raw_iocs[i])

        #     elif re.match(r'.*[//].*', raw_iocs[i]):
        #         iocs['url'].append(raw_iocs[i])

        #     elif re.match(r'.*[a-zA-Z]', raw_iocs[i]):
        #         iocs['domain'].append(raw_iocs[i])

        # # Extract hashes by their plugin
        # for hash_extracted in iocextract.extract_hashes(description_string):
        #     iocs['hash'].append(hash_extracted)

        # # Extract Yara rule
        # for yara_extracted in iocextract.extract_yara_rules(description_string):
        #     iocs['yara'].append(yara_extracted)

        # # Extract IP
        # for ip_extracted in iocextract.extract_ips(description_string, refang=True):
        #     iocs['ip'].append(ip_extracted)

        for i in range(len(raw_iocs)):
            # Replace the on9 strings
            for on9string in on9strings:
                raw_iocs[i] = raw_iocs[i].replace(on9string,
                                                  on9strings[on9string])

            # Import those IOCs into the array.
            if re.match(r'.*[@]+', raw_iocs[i]):
                iocs['email'].append(raw_iocs[i])
                iocs['email'] = list(set(iocs['email']))

            elif re.match(r'.*[//].*', raw_iocs[i]):
                iocs['url'].append(raw_iocs[i])
                iocs['url'] = list(set(iocs['url']))

            elif re.match(r'.*[a-zA-Z]', raw_iocs[i]):
                if re.match("^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,}$",
                            raw_iocs[i]):
                    iocs['domain'].append(raw_iocs[i])
                    iocs['domain'] = list(set(iocs['domain']))

        # Extract hashes by their plugin
        for hash_extracted in iocextract.extract_hashes(description_string):
            iocs['hash'].append(hash_extracted)
            iocs['hash'] = list(set(iocs['hash']))

        # Extract Yara rule
        for yara_extracted in iocextract.extract_yara_rules(
                description_string):
            iocs['yara'].append(yara_extracted)
            iocs['yara'] = list(set(iocs['yara']))

        # Extract IP
        for ip_extracted in iocextract.extract_ips(description_string,
                                                   refang=True):
            # Use regex to validate the IP format
            if re.match(r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b",
                        ip_extracted):
                iocs['ip'].append(ip_extracted)
                iocs['ip'] = list(set(iocs['ip']))

        # for key in iocs:
        #     for item in iocs[key]:
        #         print(key + ":" + item)

        return iocs
예제 #12
0
    def process_element(self,
                        content,
                        reference_link,
                        include_nonobfuscated=False):
        """Take a single source content/url and return a list of Artifacts"""

        # truncate content to a reasonable length for reference_text
        reference_text = content[:TRUNCATE_LENGTH] + (
            '...' if len(content) > TRUNCATE_LENGTH else '')

        artifact_list = []

        # collect URLs and domains
        scraped = iocextract.extract_urls(content)
        for url in scraped:
            # dump anything with ellipses, these get through the regex
            if u'\u2026' in url:
                continue

            artifact = threatingestor.artifacts.URL(
                url,
                self.name,
                reference_link=reference_link,
                reference_text=reference_text)

            # dump urls that appear to have the same domain as reference_url
            if artifact.domain() == urlparse(reference_link).netloc:
                continue

            if artifact.is_obfuscated() or include_nonobfuscated:
                # do URL collection
                artifact_list.append(artifact)

                # do domain collection in the same pass
                if artifact.is_domain():
                    artifact_list.append(
                        threatingestor.artifacts.Domain(
                            artifact.domain(),
                            self.name,
                            reference_link=reference_link,
                            reference_text=reference_text))

        # collect IPs
        scraped = iocextract.extract_ips(content)
        for ip in scraped:
            artifact = threatingestor.artifacts.IPAddress(
                ip,
                self.name,
                reference_link=reference_link,
                reference_text=reference_text)

            try:
                ipaddress = artifact.ipaddress()
                if ipaddress.is_private or ipaddress.is_loopback or ipaddress.is_reserved:
                    # don't care
                    continue

            except ValueError:
                # invalid IP
                continue

            artifact_list.append(artifact)

        # collect yara rules
        scraped = iocextract.extract_yara_rules(content)
        for rule in scraped:
            artifact = threatingestor.artifacts.YARASignature(
                rule,
                self.name,
                reference_link=reference_link,
                reference_text=reference_text)

            artifact_list.append(artifact)

        # collect hashes
        scraped = iocextract.extract_hashes(content)
        for hash_ in scraped:
            artifact = threatingestor.artifacts.Hash(
                hash_,
                self.name,
                reference_link=reference_link,
                reference_text=reference_text)

            artifact_list.append(artifact)

        # generate generic task
        title = "Manual Task: {u}".format(u=reference_link)
        description = 'URL: {u}\nTask autogenerated by ThreatIngestor from source: {s}'.format(
            s=self.name, u=reference_link)
        artifact = threatingestor.artifacts.Task(title,
                                                 self.name,
                                                 reference_link=reference_link,
                                                 reference_text=description)
        artifact_list.append(artifact)

        return artifact_list
예제 #13
0
    def start(self):
        self.logging()

        # Extraction
        if self.extract_all is not None \
         or self.ip is not None \
         or self.hash is not None \
         or self.domain is not None \
         or self.extract_file is not None:
            self.logger.info(
                'Checking the type of extraction will be performed.')

            if self.extract_file is not None:
                self.logger.info(
                    f'Obtaining IOC from file: {self.extract_file}')
                if os.path.exists(self.extract_file):
                    openfile = open(self.extract_file, 'r+')
                    all_text = openfile.read()
                    title = self.extract_file
                    file_name = self.extract_file
                else:
                    self.logger.error(
                        'The given directory or file was not found.')
            elif self.extract_url is not None:
                self.logger.info(
                    f'Obtaining IOC from WebSite: {self.extract_url}')
                self.driver.get(self.extract_url)
                soup = BeautifulSoup(self.driver.page_source, "html.parser")
                title = soup.find('title').get_text()
                all_text = self.select_all_text(soup=soup)
                file_name = self.extract_url

            if self.extract_all:
                self.driver.get(self.baseurl)
                count = 0
                for extract_iocs in iocextract.extract_iocs(all_text):
                    if '/' not in extract_iocs \
                     and '[at]' not in extract_iocs:
                        if len(
                                self.database.compare_ioc(
                                    IOC=extract_iocs.replace('[.]',
                                                             '.'))) == 0:
                            self.database.save_ioc(IOC=extract_iocs.replace(
                                '[.]', '.'),
                                                   signature=title,
                                                   tags="Extract from URL",
                                                   font="Extract",
                                                   type="IOCS",
                                                   file_name=file_name)

                            self.uploadIOC(
                                comment=f'IOC extraction: {title}',
                                IOC=extract_iocs.replace('[.]', '.'),
                                count=count,
                                name=extract_iocs.replace('[.]', '.'))
                            count += 1
                        else:
                            self.logger.debug(
                                f'IOC already registered: {extract_iocs}')
            elif self.domain:
                self.driver.get(self.baseurl)
                count = 0
                for extract_urls in iocextract.extract_urls(all_text):
                    if '/' not in extract_urls \
                     and '[at]' not in extract_urls:
                        if len(
                                self.database.compare_ioc(
                                    IOC=extract_urls.replace('[.]',
                                                             '.'))) == 0:
                            self.database.save_ioc(IOC=extract_urls.replace(
                                '[.]', '.'),
                                                   signature=title,
                                                   tags="Extract from URL",
                                                   font="Extract",
                                                   type="Domain",
                                                   file_name=file_name)

                            self.uploadIOC(
                                comment=f'IOC extraction: {title}',
                                IOC=extract_urls.replace('[.]', '.'),
                                count=count,
                                name=extract_urls.replace('[.]', '.'))
                            count += 1
                        else:
                            self.logger.debug(
                                f'IOC already registered: {extract_urls}')
            elif self.ip:
                self.driver.get(self.baseurl)
                count = 0
                for extract_ipv4s in iocextract.extract_ipv4s(all_text):
                    if '/' not in extract_ipv4s \
                     and '[at]' not in extract_ipv4s:
                        if len(
                                self.database.compare_ioc(
                                    IOC=extract_ipv4s.replace('[.]',
                                                              '.'))) == 0:
                            self.database.save_ioc(IOC=extract_ipv4s.replace(
                                '[.]', '.'),
                                                   signature=title,
                                                   tags="Extract from URL",
                                                   font="Extract",
                                                   type="ipv4",
                                                   file_name=file_name)

                            self.uploadIOC(
                                comment=f'IOC extraction: {title}',
                                IOC=extract_ipv4s.replace('[.]', '.'),
                                count=count,
                                name=extract_ipv4s.replace('[.]', '.'))
                            count += 1
                        else:
                            self.logger.debug(
                                f'IOC already registered: {extract_ipv4s}')
            elif self.hash:
                self.logger.info('Getting only the Hashes from the site.')
                self.driver.get(self.baseurl)
                count = 0
                for extract_hashes in iocextract.extract_hashes(all_text):
                    if '/' not in extract_hashes \
                     and '[at]' not in extract_hashes:
                        if len(
                                self.database.compare_ioc(
                                    IOC=extract_hashes.replace('[.]',
                                                               '.'))) == 0:
                            self.database.save_ioc(IOC=extract_hashes.replace(
                                '[.]', '.'),
                                                   signature=title,
                                                   tags="Extract from URL",
                                                   font="Extract",
                                                   type="Hash",
                                                   file_name=file_name)

                            self.uploadIOC(
                                comment=f'IOC extraction: {title}',
                                IOC=extract_hashes.replace('[.]', '.'),
                                count=count,
                                name=extract_hashes.replace('[.]', '.'))
                            count += 1
                        else:
                            self.logger.debug(
                                f'IOC already registered: {extract_hashes}')

        if self.feed is not None:
            # MalwareBaazar
            count = 0
            for iocs in MalwareBaazar().start:
                if len(self.database.compare_ioc(
                        IOC=iocs['sha256_hash'])) == 0:
                    comment = "Name: {name}, signature: {signature}, tags: {tags}, font: {font}".format(
                        name=iocs['file_name'],
                        signature=iocs['signature'],
                        tags=iocs['tags'],
                        font='Bazaar')

                    self.database.save_ioc(file_name=iocs['file_name'],
                     IOC=iocs['sha256_hash'],
                     signature=iocs['signature'],
                     tags=str(iocs['tags']).replace("'",'') \
                      .replace('[','') \
                      .replace(']',''),
                     font='Bazaar',
                     type="Hash")

                    self.uploadIOC(comment=comment,
                                   IOC=iocs['sha256_hash'],
                                   count=count,
                                   name=iocs['file_name'])
                    count += 1
                else:
                    self.logger.debug(
                        f"IOC already registered: {iocs['sha256_hash']}")

            # Circl
            for feed in MISPFeed(
                    url="https://www.circl.lu/doc/misp/feed-osint/").start:
                request = requests.get(feed,
                                       headers={
                                           'User-Agent': 'Mozilla/5.0'
                                       }).json()

                count = 0
                for iocs in request['Event']['Attribute']:
                    if iocs['category'] == 'Payload delivery':
                        if '.' not in iocs['value'] \
                         and len(iocs['value']) == 32 \
                         or len(iocs['value']) == 64:

                            if len(self.database.compare_ioc(
                                    IOC=iocs['value'])) == 0:
                                comment = "Name: {name}, signature: {signature}, tags: {tags}, font: {font}".format(
                                    name=iocs['comment'].split(' ')[0],
                                    signature=iocs['category'],
                                    tags=iocs['category'],
                                    font="Circl")

                                self.database.save_ioc(
                                    file_name=iocs['comment'].split(' ')[0],
                                    IOC=iocs['value'],
                                    signature=iocs['category'],
                                    tags=iocs['category'],
                                    font="Circl",
                                    type="Hash")

                                self.uploadIOC(
                                    comment=comment,
                                    IOC=iocs['value'],
                                    count=count,
                                    name=iocs['comment'].split(' ')[0])
                                count += 1
                            else:
                                self.logger.debug(
                                    f"IOC already registered: {iocs['value']}")

                    elif iocs['category'] == 'External analysis':
                        if 'virustotal' in iocs['value']:
                            hash = iocs['value'].split('/')[4]
                            if len(self.database.compare_ioc(IOC=hash)) == 0:
                                comment = "Name: {name}, signature: {signature}, tags: {tags}, font: {font}".format(
                                    name=iocs['comment'].split(' ')[0],
                                    signature=iocs['category'],
                                    tags=iocs['category'],
                                    font="Circl")

                                self.database.save_ioc(
                                    file_name=iocs['comment'].split(' ')[0],
                                    IOC=hash,
                                    signature=iocs['category'],
                                    tags=iocs['category'],
                                    font="Circl",
                                    type="Hash")

                                self.uploadIOC(
                                    comment=comment,
                                    IOC=iocs['value'],
                                    count=count,
                                    name=iocs['comment'].split(' ')[0])
                                count += 1
                            else:
                                self.logger.debug(
                                    f"IOC already registered: {iocs['value']}")

                    elif iocs['category'] == 'Artifacts dropped':
                        hash = iocs['value']
                        if len(self.database.compare_ioc(IOC=hash)) == 0:
                            comment = "Name: {name}, signature: {signature}, tags: {tags}, font: {font}".format(
                                name=iocs['comment'].split(' ')[0],
                                signature=iocs['category'],
                                tags=iocs['category'],
                                font="Circl")

                            self.database.save_ioc(
                                file_name=iocs['comment'].split(' ')[0],
                                IOC=hash,
                                signature=iocs['category'],
                                tags=iocs['category'],
                                font="Circl",
                                type="Hash")

                            self.uploadIOC(comment=comment,
                                           IOC=iocs['value'],
                                           count=count,
                                           name=iocs['comment'].split(' ')[0])
                            count += 1
                        else:
                            self.logger.debug(
                                f'IOC already registered: {hash}')
예제 #14
0
def get_hash(content):
    array_hashes = []
    for hashes in iocextract.extract_hashes(content):
        array_hashes.append(hashes)
    return array_hashes.append(hash)
    def _parse_indicators_from_stix_description(self, xml_content):
        iocs = {
            'title': '',
            'domain': [],
            'ip': [],
            'email': [],
            'hash': [],
            'url': [],
            'hash': [],
            'yara': [],
            'other': []
        }
        on9strings = {'[.]': '.', 'hxxp': 'http', '[@]': '@'}

        # Parse input file
        stix_package = STIXPackage.from_xml(xml_content)

        # Convert STIXPackage to a Python
        stix_dict = stix_package.to_dict()

        # Extract description from the indicator (suitable for indicator only)
        # print "-" * 100
        # print stix_dict
        # print "-" * 100

        description = stix_dict["indicators"][0]["description"]

        # Extract title
        title = stix_dict["indicators"][0]["title"]
        iocs['title'] = [title]

        # Convert the first STIXPackage dictionary into another STIXPackage via the from_dict() method.
        # Pattern for domain / email and IP addresses
        raw_iocs = re.findall(
            r'[a-zA-Z0-9-\.]*\[\.?\@?\][a-zA-Z0-9-\.\[\.\@\]]*[-a-zA-Z0-9@:%_\+.~#?&//=]*',
            description)

        # print(len(raw_iocs))

        for i in range(len(raw_iocs)):
            # Replace the on9 strings
            for on9string in on9strings:
                raw_iocs[i] = raw_iocs[i].replace(on9string,
                                                  on9strings[on9string])

            # Import those IOCs into the array.
            if re.match(r'.*[@]+', raw_iocs[i]):
                iocs['email'].append(raw_iocs[i])
                iocs['email'] = list(set(iocs['email']))

            elif re.match(r'.*[//].*', raw_iocs[i]):
                iocs['url'].append(raw_iocs[i])
                iocs['url'] = list(set(iocs['url']))

            elif re.match(r'.*[a-zA-Z]', raw_iocs[i]):
                if re.match("^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,}$",
                            raw_iocs[i]):
                    iocs['domain'].append(raw_iocs[i])
                    iocs['domain'] = list(set(iocs['domain']))

        # Extract hashes by their plugin
        for hash_extracted in iocextract.extract_hashes(description):
            iocs['hash'].append(hash_extracted)
            iocs['hash'] = list(set(iocs['hash']))

        # Extract Yara rule
        for yara_extracted in iocextract.extract_yara_rules(description):
            iocs['yara'].append(yara_extracted)
            iocs['yara'] = list(set(iocs['yara']))

        # Extract IP
        for ip_extracted in iocextract.extract_ips(description, refang=True):
            # Use regex to validate the IP format
            if re.match(r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b",
                        ip_extracted):
                iocs['ip'].append(ip_extracted)
                iocs['ip'] = list(set(iocs['ip']))

        # for key in iocs:
        #     for item in iocs[key]:
        #         print(key + ":" + item)

        return iocs
예제 #16
0
                    help="File from where to extract the hashes",
                    required=True)
args = parser.parse_args()

url = "https://www.virustotal.com/api/v3/files/"

hash_list = []

with open(args.file, 'r') as f:

    #    for line in f:
    #        hash_list.append(line.split(":")[1])

    string = f.read()

    for hash_extracted in iocextract.extract_hashes(string):
        hash_list.append(hash_extracted)

hash_list = list(dict.fromkeys(hash_list))
print("list size: " + str(len(hash_list)))

for hash in hash_list:
    print("hash: " + hash)
    headers = {"x-apikey": args.apikey}

    while True:
        response = requests.get(url + hash, headers=headers)

        if response.status_code == 200 or response.status_code == 404:
            break
예제 #17
0
    def process_element(self,
                        content,
                        reference_link,
                        include_nonobfuscated=False):
        """Take a single source content/url and return a list of Artifacts.

        This is the main work block of Source plugins, which handles
        IOC extraction and artifact creation.

        :param content: String content to extract from.
        :param reference_link: Reference link to attach to all artifacts.
        :param include_nonobfuscated: Include non-defanged URLs in output?
        """
        logger.debug(f"Processing in source '{self.name}'")

        # Truncate content to a reasonable length for reference_text.
        reference_text = content[:TRUNCATE_LENGTH] + (
            '...' if len(content) > TRUNCATE_LENGTH else '')

        # Initialize an empty list and a map of counters to track each artifact type.
        artifact_list = []
        artifact_type_count = {
            'domain': 0,
            'hash': 0,
            'ipaddress': 0,
            'task': 0,
            'url': 0,
            'yarasignature': 0,
        }

        # Collect URLs and domains.
        scraped = itertools.chain(
            iocextract.extract_unencoded_urls(content),
            # Decode encoded URLs, since we can't operate on encoded ones.
            iocextract.extract_encoded_urls(content, refang=True),
        )
        for url in scraped:
            # Dump anything with ellipses, these get through the regex.
            if u'\u2026' in url:
                continue

            artifact = threatingestor.artifacts.URL(
                url,
                self.name,
                reference_link=reference_link,
                reference_text=reference_text)

            # Dump URLs that appear to have the same domain as reference_url.
            try:
                if artifact.domain() == urlparse(reference_link).netloc:
                    continue
            except ValueError:
                # Error parsing reference_link as a URL. Ignoring.
                pass

            if artifact.is_obfuscated() or include_nonobfuscated:
                # Do URL collection.
                artifact_list.append(artifact)
                artifact_type_count['url'] += 1

                # Do domain collection in the same pass.
                # Note: domains will always be a subset of URLs. There is no
                # domain extraction.
                if artifact.is_domain():
                    artifact_list.append(
                        threatingestor.artifacts.Domain(
                            artifact.domain(),
                            self.name,
                            reference_link=reference_link,
                            reference_text=reference_text))
                    artifact_type_count['domain'] += 1

        # Collect IPs.
        scraped = iocextract.extract_ips(content)
        for ip in scraped:
            artifact = threatingestor.artifacts.IPAddress(
                ip,
                self.name,
                reference_link=reference_link,
                reference_text=reference_text)

            try:
                ipaddress = artifact.ipaddress()
                if ipaddress.is_private or ipaddress.is_loopback or ipaddress.is_reserved:
                    # Skip private, loopback, reserved IPs.
                    continue

            except ValueError:
                # Skip invalid IPs.
                continue

            artifact_list.append(artifact)
            artifact_type_count['ipaddress'] += 1

        # Collect YARA rules.
        scraped = iocextract.extract_yara_rules(content)
        for rule in scraped:
            artifact = threatingestor.artifacts.YARASignature(
                rule,
                self.name,
                reference_link=reference_link,
                reference_text=reference_text)

            artifact_list.append(artifact)
            artifact_type_count['yarasignature'] += 1

        # Collect hashes.
        scraped = iocextract.extract_hashes(content)
        for hash_ in scraped:
            artifact = threatingestor.artifacts.Hash(
                hash_,
                self.name,
                reference_link=reference_link,
                reference_text=reference_text)

            artifact_list.append(artifact)
            artifact_type_count['hash'] += 1

        # Generate generic task.
        title = f"Manual Task: {reference_link}"
        description = f"URL: {reference_link}\nTask autogenerated by ThreatIngestor from source: {self.name}"
        artifact = threatingestor.artifacts.Task(title,
                                                 self.name,
                                                 reference_link=reference_link,
                                                 reference_text=description)
        artifact_list.append(artifact)
        artifact_type_count['task'] += 1

        logger.debug(f"Found {len(artifact_list)} total artifacts")
        logger.debug(f"Type breakdown: {artifact_type_count}")
        return artifact_list