Example #1
0
    def compute(self, message):
        item = Item(message)
        content = item.get_content()
        # List of the regex results in the Item, may be null
        results = self.REG_PHONE.findall(content)

        # If the list is greater than 4, we consider the Item may contain a list of phone numbers
        if len(results) > 4:
            self.redis_logger.debug(results)
            self.redis_logger.warning(
                f'{item.get_id()} contains PID (phone numbers)')

            msg = f'infoleak:automatic-detection="phone-number";{item.get_id()}'
            self.send_message_to_queue(msg, 'Tags')

            # Send to duplicate
            self.send_message_to_queue(item.get_id(), 'Duplicate')

            stats = {}
            for phone_number in results:
                try:
                    x = phonenumbers.parse(phone_number, None)
                    country_code = x.country_code
                    if stats.get(country_code) is None:
                        stats[country_code] = 1
                    else:
                        stats[country_code] = stats[country_code] + 1
                except:
                    pass
            for country_code in stats:
                if stats[country_code] > 4:
                    self.redis_logger.warning(
                        f'{item.get_id()} contains Phone numbers with country code {country_code}'
                    )
    def compute(self, message, r_result=False):
        id, score = message.split()
        item = Item(id)
        content = item.get_content()
        all_cards = re.findall(self.regex, content)

        if len(all_cards) > 0:
            #self.redis_logger.debug(f'All matching {all_cards}')
            creditcard_set = set([])

            for card in all_cards:
                clean_card = re.sub('[^0-9]', '', card)
                if lib_refine.is_luhn_valid(clean_card):
                    self.redis_logger.debug(f'{clean_card} is valid')
                    creditcard_set.add(clean_card)

            #pprint.pprint(creditcard_set)
            to_print = f'CreditCard;{item.get_source()};{item.get_date()};{item.get_basename()};'
            if (len(creditcard_set) > 0):
                self.redis_logger.warning(
                    f'{to_print}Checked {len(creditcard_set)} valid number(s);{item.get_id()}'
                )

                #Send to duplicate
                self.send_message_to_queue(item.get_id(), 'Duplicate')

                msg = f'infoleak:automatic-detection="credit-card";{item.get_id()}'
                self.send_message_to_queue(msg, 'Tags')

                if r_result:
                    return creditcard_set
            else:
                self.redis_logger.info(
                    f'{to_print}CreditCard related;{item.get_id()}')
    def compute(self, message):
        url, id = message.split()

        if self.is_sql_injection(url):
            self.faup.decode(url)
            url_parsed = self.faup.get()

            item = Item(id)
            item_id = item.get_id()
            print(f"Detected SQL in URL: {item_id}")
            print(urllib.request.unquote(url))
            to_print = f'SQLInjection;{item.get_source()};{item.get_date()};{item.get_basename()};Detected SQL in URL;{item_id}'
            self.redis_logger.warning(to_print)

            # Send to duplicate
            self.send_message_to_queue(item_id, 'Duplicate')

            # Tag
            msg = f'infoleak:automatic-detection="sql-injection";{item_id}'
            self.send_message_to_queue(msg, 'Tags')

            # statistics
            tld = url_parsed['tld']
            if tld is not None:
                ## TODO: # FIXME: remove me
                try:
                    tld = tld.decode()
                except:
                    pass
                date = datetime.now().strftime("%Y%m")
                self.server_statistics.hincrby(f'SQLInjection_by_tld:{date}', tld, 1)
Example #4
0
    def compute(self, message, r_result=False):
        # Create Item Object
        item = Item(message)
        # Get item content
        content = item.get_content()
        categ_found = []

        # Search for pattern categories in item content
        for categ, pattern in self.categ_words:

            found = set(re.findall(pattern, content))
            lenfound = len(found)
            if lenfound >= self.matchingThreshold:
                categ_found.append(categ)
                msg = f'{item.get_id()} {lenfound}'

                # Export message to categ queue
                print(msg, categ)
                self.send_message_to_queue(msg, categ)

                self.redis_logger.info(
                    f'Categ;{item.get_source()};{item.get_date()};{item.get_basename()};Detected {lenfound} as {categ};{item.get_id()}'
                )
        if r_result:
            return categ_found
Example #5
0
    def compute(self, message):
        """
        Search for Web links from given message
        """
        # Extract item
        id, score = message.split()

        item = Item(id)
        item_content = item.get_content()

        l_urls = regex_helper.regex_findall(self.module_name,
                                            self.redis_cache_key,
                                            self.url_regex, item.get_id(),
                                            item_content)
        for url in l_urls:
            self.faup.decode(url)
            unpack_url = self.faup.get()

            to_send = f"{url} {item.get_id()}"
            print(to_send)
            self.send_message_to_queue(to_send, 'Url')
            self.redis_logger.debug(f"url_parsed: {to_send}")

        if len(l_urls) > 0:
            to_print = f'Urls;{item.get_source()};{item.get_date()};{item.get_basename()};'
            self.redis_logger.info(
                f'{to_print}Detected {len(l_urls)} URL;{item.get_id()}')
    def compute(self, message):
        url, id = message.split()

        self.faup.decode(url)
        url_parsed = self.faup.get()
        ## TODO: # FIXME: remove me
        try:
            resource_path = url_parsed['resource_path'].encode()
        except:
            resource_path = url_parsed['resource_path']

        ## TODO: # FIXME: remove me
        try:
            query_string = url_parsed['query_string'].encode()
        except:
            query_string = url_parsed['query_string']

        result_path = {'sqli' : False}
        result_query = {'sqli' : False}

        if resource_path is not None:
            result_path = pylibinjection.detect_sqli(resource_path)
            #print(f'path is sqli : {result_path}')

        if query_string is not None:
            result_query = pylibinjection.detect_sqli(query_string)
            #print(f'query is sqli : {result_query}')

        if result_path['sqli'] is True or result_query['sqli'] is True:
            item = Item(id)
            item_id = item.get_id()
            print(f"Detected (libinjection) SQL in URL: {item_id}")
            print(urllib.request.unquote(url))

            to_print = f'LibInjection;{item.get_source()};{item.get_date()};{item.get_basename()};Detected SQL in URL;{item_id}'
            self.redis_logger.warning(to_print)

            # Send to duplicate
            self.send_message_to_queue(item_id, 'Duplicate')

            # Add tag
            msg = f'infoleak:automatic-detection="sql-injection";{item_id}'
            self.send_message_to_queue(msg, 'Tags')

            #statistics
            ## TODO: # FIXME: remove me
            try:
                tld = url_parsed['tld'].decode()
            except:
                tld = url_parsed['tld']
            if tld is not None:
                date = datetime.now().strftime("%Y%m")
                self.server_statistics.hincrby(f'SQLInjection_by_tld:{date}', tld, 1)
Example #7
0
    def compute(self, message):
        # list of tuples: (url, subdomains, domain)
        urls_to_crawl = []

        id, score = message.split()
        item = Item(id)
        item_content = item.get_content()

        # max execution time on regex
        res = regex_helper.regex_findall(self.module_name,
                                         self.redis_cache_key, self.url_regex,
                                         item.get_id(), item_content)
        for x in res:
            # String to tuple
            x = x[2:-2].replace(" '", "").split("',")
            url = x[0]
            subdomain = x[4].lower()
            self.faup.decode(url)
            url_unpack = self.faup.get()
            try:  ## TODO: # FIXME: check faup version
                domain = url_unpack['domain'].decode().lower()
            except Exception as e:
                domain = url_unpack['domain'].lower()

            if crawlers.is_valid_onion_domain(domain):
                urls_to_crawl.append((url, subdomain, domain))

        to_print = f'Onion;{item.get_source()};{item.get_date()};{item.get_basename()};'
        if not urls_to_crawl:
            self.redis_logger.info(f'{to_print}Onion related;{item.get_id()}')
            return

        # TAG Item
        msg = f'infoleak:automatic-detection="onion";{item.get_id()}'
        self.send_message_to_queue(msg, 'Tags')

        if crawlers.is_crawler_activated():
            for to_crawl in urls_to_crawl:
                print(f'{to_crawl[2]} added to crawler queue: {to_crawl[0]}')
                crawlers.add_item_to_discovery_queue('onion', to_crawl[2],
                                                     to_crawl[1], to_crawl[0],
                                                     item.get_id())
        else:
            print(
                f'{to_print}Detected {len(urls_to_crawl)} .onion(s);{item.get_id()}'
            )
            self.redis_logger.warning(
                f'{to_print}Detected {len(urls_to_crawl)} .onion(s);{item.get_id()}'
            )
Example #8
0
    def compute(self, message, r_result=False):
        id, score = message.split()
        item = Item(id)
        item_content = item.get_content()

        google_api_key = regex_helper.regex_findall(self.module_name,
                                                    self.redis_cache_key,
                                                    self.re_google_api_key,
                                                    item.get_id(),
                                                    item_content)

        aws_access_key = regex_helper.regex_findall(self.module_name,
                                                    self.redis_cache_key,
                                                    self.re_aws_access_key,
                                                    item.get_id(),
                                                    item_content)
        if aws_access_key:
            aws_secret_key = regex_helper.regex_findall(
                self.module_name, self.redis_cache_key, self.re_aws_secret_key,
                item.get_id(), item_content)

        if aws_access_key or google_api_key:
            to_print = f'ApiKey;{item.get_source()};{item.get_date()};{item.get_basename()};'

            if google_api_key:
                print(f'found google api key: {to_print}')
                self.redis_logger.warning(
                    f'{to_print}Checked {len(google_api_key)} found Google API Key;{item.get_id()}'
                )

                msg = f'infoleak:automatic-detection="google-api-key";{item.get_id()}'
                self.send_message_to_queue(msg, 'Tags')

            # # TODO: # FIXME: AWS regex/validate/sanityze KEY + SECRET KEY
            if aws_access_key:
                print(f'found AWS key: {to_print}')
                self.redis_logger.warning(
                    f'{to_print}Checked {len(aws_access_key)} found AWS Key;{item.get_id()}'
                )
                if aws_secret_key:
                    print(f'found AWS secret key')
                    self.redis_logger.warning(
                        f'{to_print}Checked {len(aws_secret_key)} found AWS secret Key;{item.get_id()}'
                    )

                msg = 'infoleak:automatic-detection="aws-key";{}'.format(
                    item.get_id())
                self.send_message_to_queue(msg, 'Tags')

            # Tags
            msg = f'infoleak:automatic-detection="api-key";{item.get_id()}'
            self.send_message_to_queue(msg, 'Tags')

            # Send to duplicate
            self.send_message_to_queue(item.get_id(), 'Duplicate')

            if r_result:
                return (google_api_key, aws_access_key, aws_secret_key)
    def compute(self, item_id):
        # refresh Tracked regex
        if self.last_refresh < Tracker.get_tracker_last_updated_by_type('regex'):
            self.dict_regex_tracked = Term.get_regex_tracked_words_dict()
            self.last_refresh = time.time()
            self.redis_logger.debug('Tracked regex refreshed')
            print('Tracked regex refreshed')

        item = Item(item_id)
        item_id = item.get_id()
        item_content = item.get_content()

        for regex in self.dict_regex_tracked:
            matched = regex_helper.regex_search(self.module_name, self.redis_cache_key, self.dict_regex_tracked[regex], item_id, item_content, max_time=self.max_execution_time)
            if matched:
                self.new_tracker_found(regex, 'regex', item)
Example #10
0
    def compute(self, message):
        #  Extract item ID and tag from message
        mess_split = message.split(';')
        if len(mess_split) == 2:
            tag = mess_split[0]
            item = Item(mess_split[1])
            item_id = item.get_id()

            # Create a new tag
            Tag.add_tag('item', tag, item.get_id())
            print(f'{item_id}: Tagged {tag}')

            # Forward message to channel
            self.send_message_to_queue(message, 'MISP_The_Hive_feeder')
        else:
            # Malformed message
            raise Exception(
                f'too many values to unpack (expected 2) given {len(mess_split)} with message {message}'
            )
    def compute(self, item_id):
        # refresh Tracked term
        if self.last_refresh_word < Term.get_tracked_term_last_updated_by_type(
                'word'):
            self.list_tracked_words = Term.get_tracked_words_list()
            self.last_refresh_word = time.time()
            self.redis_logger.debug('Tracked word refreshed')
            print('Tracked word refreshed')

        if self.last_refresh_set < Term.get_tracked_term_last_updated_by_type(
                'set'):
            self.set_tracked_words_list = Term.get_set_tracked_words_list()
            self.last_refresh_set = time.time()
            self.redis_logger.debug('Tracked set refreshed')
            print('Tracked set refreshed')

        # Cast message as Item
        item = Item(item_id)
        item_date = item.get_date()
        item_content = item.get_content()

        signal.alarm(self.max_execution_time)

        dict_words_freq = None
        try:
            dict_words_freq = Term.get_text_word_frequency(item_content)
        except TimeoutException:
            self.redis_logger.warning(f"{item.get_id()} processing timeout")
        else:
            signal.alarm(0)

        if dict_words_freq:
            # create token statistics
            # for word in dict_words_freq:
            #    Term.create_token_statistics(item_date, word, dict_words_freq[word])
            item_source = item.get_source()

            # check solo words
            ####### # TODO: check if source needed #######
            for word in self.list_tracked_words:
                if word in dict_words_freq:
                    self.new_term_found(word, 'word', item.get_id(),
                                        item_source)

            # check words set
            for elem in self.set_tracked_words_list:
                list_words = elem[0]
                nb_words_threshold = elem[1]
                word_set = elem[2]
                nb_uniq_word = 0

                for word in list_words:
                    if word in dict_words_freq:
                        nb_uniq_word += 1
                if nb_uniq_word >= nb_words_threshold:
                    self.new_term_found(word_set, 'set', item.get_id(),
                                        item_source)
Example #12
0
    def compute(self, message):
        docpath = message.split(" ", -1)[-1]

        item = Item(message)
        item_id = item.get_id()
        item_content = item.get_content()

        self.redis_logger.debug(f"Indexing - {self.indexname}: {docpath}")
        print(f"Indexing - {self.indexname}: {docpath}")

        try:
            # Avoid calculating the index's size at each message
            if (time.time() - self.last_refresh > self.TIME_WAIT):
                self.last_refresh = time.time()
                if self.check_index_size() >= self.INDEX_SIZE_THRESHOLD * (
                        1000 * 1000):
                    timestamp = int(time.time())
                    self.redis_logger.debug(f"Creating new index {timestamp}")
                    print(f"Creating new index {timestamp}")
                    self.indexpath = join(self.baseindexpath, str(timestamp))
                    self.indexname = str(timestamp)
                    # update all_index
                    with open(self.indexRegister_path, "a") as f:
                        f.write('\n' + str(timestamp))
                    # create new dir
                    os.mkdir(self.indexpath)
                    self.ix = create_in(self.indexpath, self.schema)

            if self.indexertype == "whoosh":
                indexwriter = self.ix.writer()
                indexwriter.update_document(title=docpath,
                                            path=docpath,
                                            content=item_content)
                indexwriter.commit()

        except IOError:
            self.redis_logger.debug(f"CRC Checksum Failed on: {item_id}")
            print(f"CRC Checksum Failed on: {item_id}")
            self.redis_logger.error(
                f'Duplicate;{item.get_source()};{item.get_date()};{item.get_basename()};CRC Checksum Failed'
            )
    def new_term_found(self, term, term_type, item_id, item_source):
        uuid_list = Term.get_term_uuid_list(term, term_type)
        self.redis_logger.info(f'new tracked term found: {term} in {item_id}')
        print(f'new tracked term found: {term} in {item_id}')
        item_date = Item.get_date()
        for term_uuid in uuid_list:
            tracker_sources = Tracker.get_tracker_uuid_sources(term_uuid)
            if not tracker_sources or item_source in tracker_sources:
                Tracker.add_tracked_item(term_uuid, item_id)

                tags_to_add = Term.get_term_tags(term_uuid)
                for tag in tags_to_add:
                    msg = '{};{}'.format(tag, item_id)
                    self.send_message_to_queue(msg, 'Tags')

                mail_to_notify = Term.get_term_mails(term_uuid)
                if mail_to_notify:
                    mail_subject = Tracker.get_email_subject(term_uuid)
                    mail_body = Tracker_Term.mail_body_template.format(
                        term, item_id, self.full_item_url, item_id)
                for mail in mail_to_notify:
                    self.redis_logger.debug(f'Send Mail {mail_subject}')
                    print(
                        f'S        print(item_content)end Mail {mail_subject}')
                    NotificationHelper.sendEmailNotification(
                        mail, mail_subject, mail_body)

                # Webhook
                webhook_to_post = Term.get_term_webhook(term_uuid)
                if webhook_to_post:
                    json_request = {
                        "trackerId": term_uuid,
                        "itemId": item_id,
                        "itemURL": self.full_item_url + item_id,
                        "term": term,
                        "itemSource": item_source,
                        "itemDate": item_date,
                        "tags": tags_to_add,
                        "emailNotification": f'{mail_to_notify}',
                        "trackerType": term_type
                    }
                    try:
                        response = requests.post(webhook_to_post,
                                                 json=json_request)
                        if response.status_code >= 400:
                            self.redis_logger.error(
                                f"Webhook request failed for {webhook_to_post}\nReason: {response.reason}"
                            )
                    except:
                        self.redis_logger.error(
                            f"Webhook request failed for {webhook_to_post}\nReason: Something went wrong"
                        )
    def compute(self, item_id):
        # refresh YARA list
        if self.last_refresh < Tracker.get_tracker_last_updated_by_type(
                'yara'):
            self.rules = Tracker.reload_yara_rules()
            self.last_refresh = time.time()
            self.redis_logger.debug('Tracked set refreshed')
            print('Tracked set refreshed')

        self.item = Item(item_id)
        item_content = self.item.get_content()
        try:
            yara_match = self.rules.match(
                data=item_content,
                callback=self.yara_rules_match,
                which_callbacks=yara.CALLBACK_MATCHES,
                timeout=60)
            if yara_match:
                self.redis_logger.info(f'{self.item.get_id()}: {yara_match}')
                print(f'{self.item.get_id()}: {yara_match}')
        except yara.TimeoutError as e:
            print(f'{self.item.get_id()}: yara scanning timed out')
            self.redis_logger.info(
                f'{self.item.get_id()}: yara scanning timed out')
    def compute(self, message, r_result=False):
        item = Item(message)

        item_content = item.get_content()
        item_basename = item.get_basename()
        item_date = item.get_date()
        item_source = item.get_source()
        try:
            mimetype = item_basic.get_item_mimetype(item.get_id())

            if mimetype.split('/')[0] == "text":
                self.c.text(rawtext=item_content)
                self.c.potentialdomain()
                self.c.validdomain(passive_dns=True, extended=False)
                #self.redis_logger.debug(self.c.vdomain)

                if self.c.vdomain and d4.is_passive_dns_enabled():
                    for dns_record in self.c.vdomain:
                        self.send_message_to_queue(dns_record)

                localizeddomains = self.c.include(expression=self.cc_tld)
                if localizeddomains:
                    print(localizeddomains)
                    self.redis_logger.warning(
                        f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc_tld};{item.get_id()}"
                    )

                localizeddomains = self.c.localizedomain(cc=self.cc)
                if localizeddomains:
                    print(localizeddomains)
                    self.redis_logger.warning(
                        f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc};{item.get_id()}"
                    )

                if r_result:
                    return self.c.vdomain

        except IOError as err:
            self.redis_logger.error(
                f"Duplicate;{item_source};{item_date};{item_basename};CRC Checksum Failed"
            )
            raise Exception(f"CRC Checksum Failed on: {item.get_id()}")
Example #16
0
    def compute(self, message):
        item = Item(message)
        content = item.get_content()

        find = False
        get_pgp_content = False

        if KeyEnum.PGP_MESSAGE.value in content:
            self.redis_logger.warning(
                f'{item.get_basename()} has a PGP enc message')

            msg = f'infoleak:automatic-detection="pgp-message";{item.get_id()}'
            self.send_message_to_queue(msg, 'Tags')
            get_pgp_content = True
            find = True

        if KeyEnum.PGP_PUBLIC_KEY_BLOCK.value in content:
            msg = f'infoleak:automatic-detection="pgp-public-key-block";{item.get_id()}'
            self.send_message_to_queue(msg, 'Tags')
            get_pgp_content = True

        if KeyEnum.PGP_SIGNATURE.value in content:
            msg = f'infoleak:automatic-detection="pgp-signature";{item.get_id()}'
            self.send_message_to_queue(msg, 'Tags')
            get_pgp_content = True

        if KeyEnum.CERTIFICATE.value in content:
            self.redis_logger.warning(
                f'{item.get_basename()} has a certificate message')

            msg = f'infoleak:automatic-detection="certificate";{item.get_id()}'
            self.send_message_to_queue(msg, 'Tags')
            find = True

        if KeyEnum.RSA_PRIVATE_KEY.value in content:
            self.redis_logger.warning(
                f'{item.get_basename()} has a RSA private key message')
            print('rsa private key message found')

            msg = f'infoleak:automatic-detection="rsa-private-key";{item.get_id()}'
            self.send_message_to_queue(msg, 'Tags')
            find = True

        if KeyEnum.PRIVATE_KEY.value in content:
            self.redis_logger.warning(
                f'{item.get_basename()} has a private key message')
            print('private key message found')

            msg = f'infoleak:automatic-detection="private-key";{item.get_id()}'
            self.send_message_to_queue(msg, 'Tags')
            find = True

        if KeyEnum.ENCRYPTED_PRIVATE_KEY.value in content:
            self.redis_logger.warning(
                f'{item.get_basename()} has an encrypted private key message')
            print('encrypted private key message found')

            msg = f'infoleak:automatic-detection="encrypted-private-key";{item.get_id()}'
            self.send_message_to_queue(msg, 'Tags')
            find = True

        if KeyEnum.OPENSSH_PRIVATE_KEY.value in content:
            self.redis_logger.warning(
                f'{item.get_basename()} has an openssh private key message')
            print('openssh private key message found')

            msg = f'infoleak:automatic-detection="private-ssh-key";{item.get_id()}'
            self.send_message_to_queue(msg, 'Tags')
            find = True

        if KeyEnum.SSH2_ENCRYPTED_PRIVATE_KEY.value in content:
            self.redis_logger.warning(
                f'{item.get_basename()} has an ssh2 private key message')
            print('SSH2 private key message found')

            msg = f'infoleak:automatic-detection="private-ssh-key";{item.get_id()}'
            self.send_message_to_queue(msg, 'Tags')
            find = True

        if KeyEnum.OPENVPN_STATIC_KEY_V1.value in content:
            self.redis_logger.warning(
                f'{item.get_basename()} has an openssh private key message')
            print('OpenVPN Static key message found')

            msg = f'infoleak:automatic-detection="vpn-static-key";{item.get_id()}'
            self.send_message_to_queue(msg, 'Tags')
            find = True

        if KeyEnum.DSA_PRIVATE_KEY.value in content:
            self.redis_logger.warning(
                f'{item.get_basename()} has a dsa private key message')

            msg = f'infoleak:automatic-detection="dsa-private-key";{item.get_id()}'
            self.send_message_to_queue(msg, 'Tags')
            find = True

        if KeyEnum.EC_PRIVATE_KEY.value in content:
            self.redis_logger.warning(
                f'{item.get_basename()} has an ec private key message')

            msg = f'infoleak:automatic-detection="ec-private-key";{item.get_id()}'
            self.send_message_to_queue(msg, 'Tags')
            find = True

        if KeyEnum.PGP_PRIVATE_KEY_BLOCK.value in content:
            self.redis_logger.warning(
                f'{item.get_basename()} has a pgp private key block message')

            msg = f'infoleak:automatic-detection="pgp-private-key";{item.get_id()}'
            self.send_message_to_queue(msg, 'Tags')
            find = True

        if KeyEnum.PUBLIC_KEY.value in content:
            self.redis_logger.warning(
                f'{item.get_basename()} has a public key message')

            msg = f'infoleak:automatic-detection="public-key";{item.get_id()}'
            self.send_message_to_queue(msg, 'Tags')
            find = True

        # pgp content
        if get_pgp_content:
            self.send_message_to_queue(item.get_id(), 'PgpDump')

        if find:
            #Send to duplicate
            self.send_message_to_queue(item.get_id(), 'Duplicate')
            self.redis_logger.debug(f'{item.get_id()} has key(s)')
            print(f'{item.get_id()} has key(s)')
class Tracker_Yara(AbstractModule):

    mail_body_template = "AIL Framework,\nNew YARA match: {}\nitem id: {}\nurl: {}{}"
    """
    Tracker_Yara module for AIL framework
    """
    def __init__(self):
        super(Tracker_Yara, self).__init__()
        self.pending_seconds = 5

        self.full_item_url = self.process.config.get(
            "Notifications", "ail_domain") + "/object/item?id="

        # Load Yara rules
        self.rules = Tracker.reload_yara_rules()
        self.last_refresh = time.time()

        self.item = None

        self.redis_logger.info(f"Module: {self.module_name} Launched")

    def compute(self, item_id):
        # refresh YARA list
        if self.last_refresh < Tracker.get_tracker_last_updated_by_type(
                'yara'):
            self.rules = Tracker.reload_yara_rules()
            self.last_refresh = time.time()
            self.redis_logger.debug('Tracked set refreshed')
            print('Tracked set refreshed')

        self.item = Item(item_id)
        item_content = self.item.get_content()
        try:
            yara_match = self.rules.match(
                data=item_content,
                callback=self.yara_rules_match,
                which_callbacks=yara.CALLBACK_MATCHES,
                timeout=60)
            if yara_match:
                self.redis_logger.info(f'{self.item.get_id()}: {yara_match}')
                print(f'{self.item.get_id()}: {yara_match}')
        except yara.TimeoutError as e:
            print(f'{self.item.get_id()}: yara scanning timed out')
            self.redis_logger.info(
                f'{self.item.get_id()}: yara scanning timed out')

    def yara_rules_match(self, data):
        tracker_uuid = data['namespace']
        item_id = self.item.get_id()
        item_source = self.item.get_source()
        item_date = self.item.get_date()

        # Source Filtering
        tracker_sources = Tracker.get_tracker_uuid_sources(tracker_uuid)
        if tracker_sources and item_source not in tracker_sources:
            print(f'Source Filtering: {data["rule"]}')
            return yara.CALLBACK_CONTINUE

        Tracker.add_tracked_item(tracker_uuid, item_id)

        # Tags
        tags_to_add = Tracker.get_tracker_tags(tracker_uuid)
        for tag in tags_to_add:
            msg = '{};{}'.format(tag, item_id)
            self.send_message_to_queue(msg, 'Tags')

        # Mails
        mail_to_notify = Tracker.get_tracker_mails(tracker_uuid)
        if mail_to_notify:
            mail_subject = Tracker.get_email_subject(tracker_uuid)
            mail_body = Tracker_Yara.mail_body_template.format(
                data['rule'], item_id, self.full_item_url, item_id)
        for mail in mail_to_notify:
            self.redis_logger.debug(f'Send Mail {mail_subject}')
            print(f'Send Mail {mail_subject}')
            NotificationHelper.sendEmailNotification(mail, mail_subject,
                                                     mail_body)

        # Webhook
        webhook_to_post = Term.get_term_webhook(tracker_uuid)
        if webhook_to_post:
            json_request = {
                "trackerId": tracker_uuid,
                "itemId": item_id,
                "itemURL": self.full_item_url + item_id,
                "dataRule": data["rule"],
                "itemSource": item_source,
                "itemDate": item_date,
                "tags": tags_to_add,
                "emailNotification": f'{mail_to_notify}',
                "trackerType": "yara"
            }
            try:
                response = requests.post(webhook_to_post, json=json_request)
                if response.status_code >= 400:
                    self.redis_logger.error(
                        f"Webhook request failed for {webhook_to_post}\nReason: {response.reason}"
                    )
            except:
                self.redis_logger.error(
                    f"Webhook request failed for {webhook_to_post}\nReason: Something went wrong"
                )

        return yara.CALLBACK_CONTINUE
Example #18
0
    def compute(self, message):

        id, count = message.split()
        item = Item(id)

        item_content = item.get_content()

        # Extract all credentials
        all_credentials = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_cred, item.get_id(), item_content, max_time=self.max_execution_time)

        if all_credentials:
            nb_cred = len(all_credentials)
            message = f'Checked {nb_cred} credentials found.'

            all_sites = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_web, item.get_id(), item_content, max_time=self.max_execution_time)
            if all_sites:
                discovered_sites = ', '.join(all_sites)
                message += f' Related websites: {discovered_sites}'

            print(message)

            to_print = f'Credential;{item.get_source()};{item.get_date()};{item.get_basename()};{message};{item.get_id()}'

            #num of creds above tresh, publish an alert
            if nb_cred > self.criticalNumberToAlert:
                print(f"========> Found more than 10 credentials in this file : {item.get_id()}")
                self.redis_logger.warning(to_print)

                # Send to duplicate
                self.send_message_to_queue(item.get_id(), 'Duplicate')

                msg = f'infoleak:automatic-detection="credential";{item.get_id()}'
                self.send_message_to_queue(msg, 'Tags')

                site_occurence = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_site_for_stats, item.get_id(), item_content, max_time=self.max_execution_time, r_set=False)

                creds_sites = {}

                for site in site_occurence:
                    site_domain = site[1:-1].lower()
                    if site_domain in creds_sites.keys():
                        creds_sites[site_domain] += 1
                    else:
                        creds_sites[site_domain] = 1

                for url in all_sites:
                    self.faup.decode(url)
                    domain = self.faup.get()['domain']
                    ## TODO: # FIXME: remove me, check faup versionb
                    try:
                        domain = domain.decode()
                    except:
                        pass
                    if domain in creds_sites.keys():
                        creds_sites[domain] += 1
                    else:
                        creds_sites[domain] = 1

                for site, num in creds_sites.items(): # Send for each different site to moduleStats

                    mssg = f'credential;{num};{site};{item.get_date()}'
                    print(mssg)
                    self.send_message_to_queue(mssg, 'ModuleStats')

                if all_sites:
                    discovered_sites = ', '.join(all_sites)
                    print(f"=======> Probably on : {discovered_sites}")

                date = datetime.now().strftime("%Y%m")
                for cred in all_credentials:
                    maildomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0]
                    self.faup.decode(maildomains)
                    tld = self.faup.get()['tld']
                    ## TODO: # FIXME: remove me
                    try:
                        tld = tld.decode()
                    except:
                        pass
                    self.server_statistics.hincrby('credential_by_tld:'+date, tld, 1)
            else:
                self.redis_logger.info(to_print)
                print(f'found {nb_cred} credentials')

            # For searching credential in termFreq
            for cred in all_credentials:
                cred = cred.split('@')[0] #Split to ignore mail address

                # unique number attached to unique path
                uniq_num_path = self.server_cred.incr(Credential.REDIS_KEY_NUM_PATH)
                self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET, {item.get_id(): uniq_num_path})
                self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET_REV, {uniq_num_path: item.get_id()})

                # unique number attached to unique username
                uniq_num_cred = self.server_cred.hget(Credential.REDIS_KEY_ALL_CRED_SET, cred)
                if uniq_num_cred is None:
                    # cred do not exist, create new entries
                    uniq_num_cred = self.server_cred.incr(Credential.REDIS_KEY_NUM_USERNAME)
                    self.server_cred.hmset(Credential.REDIS_KEY_ALL_CRED_SET, {cred: uniq_num_cred})
                    self.server_cred.hmset(Credential.REDIS_KEY_ALL_CRED_SET_REV, {uniq_num_cred: cred})

                # Add the mapping between the credential and the path
                self.server_cred.sadd(Credential.REDIS_KEY_MAP_CRED_TO_PATH+'_'+str(uniq_num_cred), uniq_num_path)

                # Split credentials on capital letters, numbers, dots and so on
                # Add the split to redis, each split point towards its initial credential unique number
                splitedCred = re.findall(Credential.REGEX_CRED, cred)
                for partCred in splitedCred:
                    if len(partCred) > self.minimumLengthThreshold:
                        self.server_cred.sadd(partCred, uniq_num_cred)