def compute(self, message): item = Item(message) content = item.get_content() # List of the regex results in the Item, may be null results = self.REG_PHONE.findall(content) # If the list is greater than 4, we consider the Item may contain a list of phone numbers if len(results) > 4: self.redis_logger.debug(results) self.redis_logger.warning( f'{item.get_id()} contains PID (phone numbers)') msg = f'infoleak:automatic-detection="phone-number";{item.get_id()}' self.send_message_to_queue(msg, 'Tags') # Send to duplicate self.send_message_to_queue(item.get_id(), 'Duplicate') stats = {} for phone_number in results: try: x = phonenumbers.parse(phone_number, None) country_code = x.country_code if stats.get(country_code) is None: stats[country_code] = 1 else: stats[country_code] = stats[country_code] + 1 except: pass for country_code in stats: if stats[country_code] > 4: self.redis_logger.warning( f'{item.get_id()} contains Phone numbers with country code {country_code}' )
def compute(self, message, r_result=False): id, score = message.split() item = Item(id) content = item.get_content() all_cards = re.findall(self.regex, content) if len(all_cards) > 0: #self.redis_logger.debug(f'All matching {all_cards}') creditcard_set = set([]) for card in all_cards: clean_card = re.sub('[^0-9]', '', card) if lib_refine.is_luhn_valid(clean_card): self.redis_logger.debug(f'{clean_card} is valid') creditcard_set.add(clean_card) #pprint.pprint(creditcard_set) to_print = f'CreditCard;{item.get_source()};{item.get_date()};{item.get_basename()};' if (len(creditcard_set) > 0): self.redis_logger.warning( f'{to_print}Checked {len(creditcard_set)} valid number(s);{item.get_id()}' ) #Send to duplicate self.send_message_to_queue(item.get_id(), 'Duplicate') msg = f'infoleak:automatic-detection="credit-card";{item.get_id()}' self.send_message_to_queue(msg, 'Tags') if r_result: return creditcard_set else: self.redis_logger.info( f'{to_print}CreditCard related;{item.get_id()}')
def compute(self, message): url, id = message.split() if self.is_sql_injection(url): self.faup.decode(url) url_parsed = self.faup.get() item = Item(id) item_id = item.get_id() print(f"Detected SQL in URL: {item_id}") print(urllib.request.unquote(url)) to_print = f'SQLInjection;{item.get_source()};{item.get_date()};{item.get_basename()};Detected SQL in URL;{item_id}' self.redis_logger.warning(to_print) # Send to duplicate self.send_message_to_queue(item_id, 'Duplicate') # Tag msg = f'infoleak:automatic-detection="sql-injection";{item_id}' self.send_message_to_queue(msg, 'Tags') # statistics tld = url_parsed['tld'] if tld is not None: ## TODO: # FIXME: remove me try: tld = tld.decode() except: pass date = datetime.now().strftime("%Y%m") self.server_statistics.hincrby(f'SQLInjection_by_tld:{date}', tld, 1)
def compute(self, message, r_result=False): # Create Item Object item = Item(message) # Get item content content = item.get_content() categ_found = [] # Search for pattern categories in item content for categ, pattern in self.categ_words: found = set(re.findall(pattern, content)) lenfound = len(found) if lenfound >= self.matchingThreshold: categ_found.append(categ) msg = f'{item.get_id()} {lenfound}' # Export message to categ queue print(msg, categ) self.send_message_to_queue(msg, categ) self.redis_logger.info( f'Categ;{item.get_source()};{item.get_date()};{item.get_basename()};Detected {lenfound} as {categ};{item.get_id()}' ) if r_result: return categ_found
def compute(self, message): """ Search for Web links from given message """ # Extract item id, score = message.split() item = Item(id) item_content = item.get_content() l_urls = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.url_regex, item.get_id(), item_content) for url in l_urls: self.faup.decode(url) unpack_url = self.faup.get() to_send = f"{url} {item.get_id()}" print(to_send) self.send_message_to_queue(to_send, 'Url') self.redis_logger.debug(f"url_parsed: {to_send}") if len(l_urls) > 0: to_print = f'Urls;{item.get_source()};{item.get_date()};{item.get_basename()};' self.redis_logger.info( f'{to_print}Detected {len(l_urls)} URL;{item.get_id()}')
def compute(self, message): url, id = message.split() self.faup.decode(url) url_parsed = self.faup.get() ## TODO: # FIXME: remove me try: resource_path = url_parsed['resource_path'].encode() except: resource_path = url_parsed['resource_path'] ## TODO: # FIXME: remove me try: query_string = url_parsed['query_string'].encode() except: query_string = url_parsed['query_string'] result_path = {'sqli' : False} result_query = {'sqli' : False} if resource_path is not None: result_path = pylibinjection.detect_sqli(resource_path) #print(f'path is sqli : {result_path}') if query_string is not None: result_query = pylibinjection.detect_sqli(query_string) #print(f'query is sqli : {result_query}') if result_path['sqli'] is True or result_query['sqli'] is True: item = Item(id) item_id = item.get_id() print(f"Detected (libinjection) SQL in URL: {item_id}") print(urllib.request.unquote(url)) to_print = f'LibInjection;{item.get_source()};{item.get_date()};{item.get_basename()};Detected SQL in URL;{item_id}' self.redis_logger.warning(to_print) # Send to duplicate self.send_message_to_queue(item_id, 'Duplicate') # Add tag msg = f'infoleak:automatic-detection="sql-injection";{item_id}' self.send_message_to_queue(msg, 'Tags') #statistics ## TODO: # FIXME: remove me try: tld = url_parsed['tld'].decode() except: tld = url_parsed['tld'] if tld is not None: date = datetime.now().strftime("%Y%m") self.server_statistics.hincrby(f'SQLInjection_by_tld:{date}', tld, 1)
def compute(self, message): # list of tuples: (url, subdomains, domain) urls_to_crawl = [] id, score = message.split() item = Item(id) item_content = item.get_content() # max execution time on regex res = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.url_regex, item.get_id(), item_content) for x in res: # String to tuple x = x[2:-2].replace(" '", "").split("',") url = x[0] subdomain = x[4].lower() self.faup.decode(url) url_unpack = self.faup.get() try: ## TODO: # FIXME: check faup version domain = url_unpack['domain'].decode().lower() except Exception as e: domain = url_unpack['domain'].lower() if crawlers.is_valid_onion_domain(domain): urls_to_crawl.append((url, subdomain, domain)) to_print = f'Onion;{item.get_source()};{item.get_date()};{item.get_basename()};' if not urls_to_crawl: self.redis_logger.info(f'{to_print}Onion related;{item.get_id()}') return # TAG Item msg = f'infoleak:automatic-detection="onion";{item.get_id()}' self.send_message_to_queue(msg, 'Tags') if crawlers.is_crawler_activated(): for to_crawl in urls_to_crawl: print(f'{to_crawl[2]} added to crawler queue: {to_crawl[0]}') crawlers.add_item_to_discovery_queue('onion', to_crawl[2], to_crawl[1], to_crawl[0], item.get_id()) else: print( f'{to_print}Detected {len(urls_to_crawl)} .onion(s);{item.get_id()}' ) self.redis_logger.warning( f'{to_print}Detected {len(urls_to_crawl)} .onion(s);{item.get_id()}' )
def compute(self, message, r_result=False): id, score = message.split() item = Item(id) item_content = item.get_content() google_api_key = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.re_google_api_key, item.get_id(), item_content) aws_access_key = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.re_aws_access_key, item.get_id(), item_content) if aws_access_key: aws_secret_key = regex_helper.regex_findall( self.module_name, self.redis_cache_key, self.re_aws_secret_key, item.get_id(), item_content) if aws_access_key or google_api_key: to_print = f'ApiKey;{item.get_source()};{item.get_date()};{item.get_basename()};' if google_api_key: print(f'found google api key: {to_print}') self.redis_logger.warning( f'{to_print}Checked {len(google_api_key)} found Google API Key;{item.get_id()}' ) msg = f'infoleak:automatic-detection="google-api-key";{item.get_id()}' self.send_message_to_queue(msg, 'Tags') # # TODO: # FIXME: AWS regex/validate/sanityze KEY + SECRET KEY if aws_access_key: print(f'found AWS key: {to_print}') self.redis_logger.warning( f'{to_print}Checked {len(aws_access_key)} found AWS Key;{item.get_id()}' ) if aws_secret_key: print(f'found AWS secret key') self.redis_logger.warning( f'{to_print}Checked {len(aws_secret_key)} found AWS secret Key;{item.get_id()}' ) msg = 'infoleak:automatic-detection="aws-key";{}'.format( item.get_id()) self.send_message_to_queue(msg, 'Tags') # Tags msg = f'infoleak:automatic-detection="api-key";{item.get_id()}' self.send_message_to_queue(msg, 'Tags') # Send to duplicate self.send_message_to_queue(item.get_id(), 'Duplicate') if r_result: return (google_api_key, aws_access_key, aws_secret_key)
def compute(self, item_id): # refresh Tracked regex if self.last_refresh < Tracker.get_tracker_last_updated_by_type('regex'): self.dict_regex_tracked = Term.get_regex_tracked_words_dict() self.last_refresh = time.time() self.redis_logger.debug('Tracked regex refreshed') print('Tracked regex refreshed') item = Item(item_id) item_id = item.get_id() item_content = item.get_content() for regex in self.dict_regex_tracked: matched = regex_helper.regex_search(self.module_name, self.redis_cache_key, self.dict_regex_tracked[regex], item_id, item_content, max_time=self.max_execution_time) if matched: self.new_tracker_found(regex, 'regex', item)
def compute(self, message): # Extract item ID and tag from message mess_split = message.split(';') if len(mess_split) == 2: tag = mess_split[0] item = Item(mess_split[1]) item_id = item.get_id() # Create a new tag Tag.add_tag('item', tag, item.get_id()) print(f'{item_id}: Tagged {tag}') # Forward message to channel self.send_message_to_queue(message, 'MISP_The_Hive_feeder') else: # Malformed message raise Exception( f'too many values to unpack (expected 2) given {len(mess_split)} with message {message}' )
def compute(self, item_id): # refresh Tracked term if self.last_refresh_word < Term.get_tracked_term_last_updated_by_type( 'word'): self.list_tracked_words = Term.get_tracked_words_list() self.last_refresh_word = time.time() self.redis_logger.debug('Tracked word refreshed') print('Tracked word refreshed') if self.last_refresh_set < Term.get_tracked_term_last_updated_by_type( 'set'): self.set_tracked_words_list = Term.get_set_tracked_words_list() self.last_refresh_set = time.time() self.redis_logger.debug('Tracked set refreshed') print('Tracked set refreshed') # Cast message as Item item = Item(item_id) item_date = item.get_date() item_content = item.get_content() signal.alarm(self.max_execution_time) dict_words_freq = None try: dict_words_freq = Term.get_text_word_frequency(item_content) except TimeoutException: self.redis_logger.warning(f"{item.get_id()} processing timeout") else: signal.alarm(0) if dict_words_freq: # create token statistics # for word in dict_words_freq: # Term.create_token_statistics(item_date, word, dict_words_freq[word]) item_source = item.get_source() # check solo words ####### # TODO: check if source needed ####### for word in self.list_tracked_words: if word in dict_words_freq: self.new_term_found(word, 'word', item.get_id(), item_source) # check words set for elem in self.set_tracked_words_list: list_words = elem[0] nb_words_threshold = elem[1] word_set = elem[2] nb_uniq_word = 0 for word in list_words: if word in dict_words_freq: nb_uniq_word += 1 if nb_uniq_word >= nb_words_threshold: self.new_term_found(word_set, 'set', item.get_id(), item_source)
def compute(self, message): docpath = message.split(" ", -1)[-1] item = Item(message) item_id = item.get_id() item_content = item.get_content() self.redis_logger.debug(f"Indexing - {self.indexname}: {docpath}") print(f"Indexing - {self.indexname}: {docpath}") try: # Avoid calculating the index's size at each message if (time.time() - self.last_refresh > self.TIME_WAIT): self.last_refresh = time.time() if self.check_index_size() >= self.INDEX_SIZE_THRESHOLD * ( 1000 * 1000): timestamp = int(time.time()) self.redis_logger.debug(f"Creating new index {timestamp}") print(f"Creating new index {timestamp}") self.indexpath = join(self.baseindexpath, str(timestamp)) self.indexname = str(timestamp) # update all_index with open(self.indexRegister_path, "a") as f: f.write('\n' + str(timestamp)) # create new dir os.mkdir(self.indexpath) self.ix = create_in(self.indexpath, self.schema) if self.indexertype == "whoosh": indexwriter = self.ix.writer() indexwriter.update_document(title=docpath, path=docpath, content=item_content) indexwriter.commit() except IOError: self.redis_logger.debug(f"CRC Checksum Failed on: {item_id}") print(f"CRC Checksum Failed on: {item_id}") self.redis_logger.error( f'Duplicate;{item.get_source()};{item.get_date()};{item.get_basename()};CRC Checksum Failed' )
def new_term_found(self, term, term_type, item_id, item_source): uuid_list = Term.get_term_uuid_list(term, term_type) self.redis_logger.info(f'new tracked term found: {term} in {item_id}') print(f'new tracked term found: {term} in {item_id}') item_date = Item.get_date() for term_uuid in uuid_list: tracker_sources = Tracker.get_tracker_uuid_sources(term_uuid) if not tracker_sources or item_source in tracker_sources: Tracker.add_tracked_item(term_uuid, item_id) tags_to_add = Term.get_term_tags(term_uuid) for tag in tags_to_add: msg = '{};{}'.format(tag, item_id) self.send_message_to_queue(msg, 'Tags') mail_to_notify = Term.get_term_mails(term_uuid) if mail_to_notify: mail_subject = Tracker.get_email_subject(term_uuid) mail_body = Tracker_Term.mail_body_template.format( term, item_id, self.full_item_url, item_id) for mail in mail_to_notify: self.redis_logger.debug(f'Send Mail {mail_subject}') print( f'S print(item_content)end Mail {mail_subject}') NotificationHelper.sendEmailNotification( mail, mail_subject, mail_body) # Webhook webhook_to_post = Term.get_term_webhook(term_uuid) if webhook_to_post: json_request = { "trackerId": term_uuid, "itemId": item_id, "itemURL": self.full_item_url + item_id, "term": term, "itemSource": item_source, "itemDate": item_date, "tags": tags_to_add, "emailNotification": f'{mail_to_notify}', "trackerType": term_type } try: response = requests.post(webhook_to_post, json=json_request) if response.status_code >= 400: self.redis_logger.error( f"Webhook request failed for {webhook_to_post}\nReason: {response.reason}" ) except: self.redis_logger.error( f"Webhook request failed for {webhook_to_post}\nReason: Something went wrong" )
def compute(self, item_id): # refresh YARA list if self.last_refresh < Tracker.get_tracker_last_updated_by_type( 'yara'): self.rules = Tracker.reload_yara_rules() self.last_refresh = time.time() self.redis_logger.debug('Tracked set refreshed') print('Tracked set refreshed') self.item = Item(item_id) item_content = self.item.get_content() try: yara_match = self.rules.match( data=item_content, callback=self.yara_rules_match, which_callbacks=yara.CALLBACK_MATCHES, timeout=60) if yara_match: self.redis_logger.info(f'{self.item.get_id()}: {yara_match}') print(f'{self.item.get_id()}: {yara_match}') except yara.TimeoutError as e: print(f'{self.item.get_id()}: yara scanning timed out') self.redis_logger.info( f'{self.item.get_id()}: yara scanning timed out')
def compute(self, message, r_result=False): item = Item(message) item_content = item.get_content() item_basename = item.get_basename() item_date = item.get_date() item_source = item.get_source() try: mimetype = item_basic.get_item_mimetype(item.get_id()) if mimetype.split('/')[0] == "text": self.c.text(rawtext=item_content) self.c.potentialdomain() self.c.validdomain(passive_dns=True, extended=False) #self.redis_logger.debug(self.c.vdomain) if self.c.vdomain and d4.is_passive_dns_enabled(): for dns_record in self.c.vdomain: self.send_message_to_queue(dns_record) localizeddomains = self.c.include(expression=self.cc_tld) if localizeddomains: print(localizeddomains) self.redis_logger.warning( f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc_tld};{item.get_id()}" ) localizeddomains = self.c.localizedomain(cc=self.cc) if localizeddomains: print(localizeddomains) self.redis_logger.warning( f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc};{item.get_id()}" ) if r_result: return self.c.vdomain except IOError as err: self.redis_logger.error( f"Duplicate;{item_source};{item_date};{item_basename};CRC Checksum Failed" ) raise Exception(f"CRC Checksum Failed on: {item.get_id()}")
def compute(self, message): item = Item(message) content = item.get_content() find = False get_pgp_content = False if KeyEnum.PGP_MESSAGE.value in content: self.redis_logger.warning( f'{item.get_basename()} has a PGP enc message') msg = f'infoleak:automatic-detection="pgp-message";{item.get_id()}' self.send_message_to_queue(msg, 'Tags') get_pgp_content = True find = True if KeyEnum.PGP_PUBLIC_KEY_BLOCK.value in content: msg = f'infoleak:automatic-detection="pgp-public-key-block";{item.get_id()}' self.send_message_to_queue(msg, 'Tags') get_pgp_content = True if KeyEnum.PGP_SIGNATURE.value in content: msg = f'infoleak:automatic-detection="pgp-signature";{item.get_id()}' self.send_message_to_queue(msg, 'Tags') get_pgp_content = True if KeyEnum.CERTIFICATE.value in content: self.redis_logger.warning( f'{item.get_basename()} has a certificate message') msg = f'infoleak:automatic-detection="certificate";{item.get_id()}' self.send_message_to_queue(msg, 'Tags') find = True if KeyEnum.RSA_PRIVATE_KEY.value in content: self.redis_logger.warning( f'{item.get_basename()} has a RSA private key message') print('rsa private key message found') msg = f'infoleak:automatic-detection="rsa-private-key";{item.get_id()}' self.send_message_to_queue(msg, 'Tags') find = True if KeyEnum.PRIVATE_KEY.value in content: self.redis_logger.warning( f'{item.get_basename()} has a private key message') print('private key message found') msg = f'infoleak:automatic-detection="private-key";{item.get_id()}' self.send_message_to_queue(msg, 'Tags') find = True if KeyEnum.ENCRYPTED_PRIVATE_KEY.value in content: self.redis_logger.warning( f'{item.get_basename()} has an encrypted private key message') print('encrypted private key message found') msg = f'infoleak:automatic-detection="encrypted-private-key";{item.get_id()}' self.send_message_to_queue(msg, 'Tags') find = True if KeyEnum.OPENSSH_PRIVATE_KEY.value in content: self.redis_logger.warning( f'{item.get_basename()} has an openssh private key message') print('openssh private key message found') msg = f'infoleak:automatic-detection="private-ssh-key";{item.get_id()}' self.send_message_to_queue(msg, 'Tags') find = True if KeyEnum.SSH2_ENCRYPTED_PRIVATE_KEY.value in content: self.redis_logger.warning( f'{item.get_basename()} has an ssh2 private key message') print('SSH2 private key message found') msg = f'infoleak:automatic-detection="private-ssh-key";{item.get_id()}' self.send_message_to_queue(msg, 'Tags') find = True if KeyEnum.OPENVPN_STATIC_KEY_V1.value in content: self.redis_logger.warning( f'{item.get_basename()} has an openssh private key message') print('OpenVPN Static key message found') msg = f'infoleak:automatic-detection="vpn-static-key";{item.get_id()}' self.send_message_to_queue(msg, 'Tags') find = True if KeyEnum.DSA_PRIVATE_KEY.value in content: self.redis_logger.warning( f'{item.get_basename()} has a dsa private key message') msg = f'infoleak:automatic-detection="dsa-private-key";{item.get_id()}' self.send_message_to_queue(msg, 'Tags') find = True if KeyEnum.EC_PRIVATE_KEY.value in content: self.redis_logger.warning( f'{item.get_basename()} has an ec private key message') msg = f'infoleak:automatic-detection="ec-private-key";{item.get_id()}' self.send_message_to_queue(msg, 'Tags') find = True if KeyEnum.PGP_PRIVATE_KEY_BLOCK.value in content: self.redis_logger.warning( f'{item.get_basename()} has a pgp private key block message') msg = f'infoleak:automatic-detection="pgp-private-key";{item.get_id()}' self.send_message_to_queue(msg, 'Tags') find = True if KeyEnum.PUBLIC_KEY.value in content: self.redis_logger.warning( f'{item.get_basename()} has a public key message') msg = f'infoleak:automatic-detection="public-key";{item.get_id()}' self.send_message_to_queue(msg, 'Tags') find = True # pgp content if get_pgp_content: self.send_message_to_queue(item.get_id(), 'PgpDump') if find: #Send to duplicate self.send_message_to_queue(item.get_id(), 'Duplicate') self.redis_logger.debug(f'{item.get_id()} has key(s)') print(f'{item.get_id()} has key(s)')
class Tracker_Yara(AbstractModule): mail_body_template = "AIL Framework,\nNew YARA match: {}\nitem id: {}\nurl: {}{}" """ Tracker_Yara module for AIL framework """ def __init__(self): super(Tracker_Yara, self).__init__() self.pending_seconds = 5 self.full_item_url = self.process.config.get( "Notifications", "ail_domain") + "/object/item?id=" # Load Yara rules self.rules = Tracker.reload_yara_rules() self.last_refresh = time.time() self.item = None self.redis_logger.info(f"Module: {self.module_name} Launched") def compute(self, item_id): # refresh YARA list if self.last_refresh < Tracker.get_tracker_last_updated_by_type( 'yara'): self.rules = Tracker.reload_yara_rules() self.last_refresh = time.time() self.redis_logger.debug('Tracked set refreshed') print('Tracked set refreshed') self.item = Item(item_id) item_content = self.item.get_content() try: yara_match = self.rules.match( data=item_content, callback=self.yara_rules_match, which_callbacks=yara.CALLBACK_MATCHES, timeout=60) if yara_match: self.redis_logger.info(f'{self.item.get_id()}: {yara_match}') print(f'{self.item.get_id()}: {yara_match}') except yara.TimeoutError as e: print(f'{self.item.get_id()}: yara scanning timed out') self.redis_logger.info( f'{self.item.get_id()}: yara scanning timed out') def yara_rules_match(self, data): tracker_uuid = data['namespace'] item_id = self.item.get_id() item_source = self.item.get_source() item_date = self.item.get_date() # Source Filtering tracker_sources = Tracker.get_tracker_uuid_sources(tracker_uuid) if tracker_sources and item_source not in tracker_sources: print(f'Source Filtering: {data["rule"]}') return yara.CALLBACK_CONTINUE Tracker.add_tracked_item(tracker_uuid, item_id) # Tags tags_to_add = Tracker.get_tracker_tags(tracker_uuid) for tag in tags_to_add: msg = '{};{}'.format(tag, item_id) self.send_message_to_queue(msg, 'Tags') # Mails mail_to_notify = Tracker.get_tracker_mails(tracker_uuid) if mail_to_notify: mail_subject = Tracker.get_email_subject(tracker_uuid) mail_body = Tracker_Yara.mail_body_template.format( data['rule'], item_id, self.full_item_url, item_id) for mail in mail_to_notify: self.redis_logger.debug(f'Send Mail {mail_subject}') print(f'Send Mail {mail_subject}') NotificationHelper.sendEmailNotification(mail, mail_subject, mail_body) # Webhook webhook_to_post = Term.get_term_webhook(tracker_uuid) if webhook_to_post: json_request = { "trackerId": tracker_uuid, "itemId": item_id, "itemURL": self.full_item_url + item_id, "dataRule": data["rule"], "itemSource": item_source, "itemDate": item_date, "tags": tags_to_add, "emailNotification": f'{mail_to_notify}', "trackerType": "yara" } try: response = requests.post(webhook_to_post, json=json_request) if response.status_code >= 400: self.redis_logger.error( f"Webhook request failed for {webhook_to_post}\nReason: {response.reason}" ) except: self.redis_logger.error( f"Webhook request failed for {webhook_to_post}\nReason: Something went wrong" ) return yara.CALLBACK_CONTINUE
def compute(self, message): id, count = message.split() item = Item(id) item_content = item.get_content() # Extract all credentials all_credentials = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_cred, item.get_id(), item_content, max_time=self.max_execution_time) if all_credentials: nb_cred = len(all_credentials) message = f'Checked {nb_cred} credentials found.' all_sites = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_web, item.get_id(), item_content, max_time=self.max_execution_time) if all_sites: discovered_sites = ', '.join(all_sites) message += f' Related websites: {discovered_sites}' print(message) to_print = f'Credential;{item.get_source()};{item.get_date()};{item.get_basename()};{message};{item.get_id()}' #num of creds above tresh, publish an alert if nb_cred > self.criticalNumberToAlert: print(f"========> Found more than 10 credentials in this file : {item.get_id()}") self.redis_logger.warning(to_print) # Send to duplicate self.send_message_to_queue(item.get_id(), 'Duplicate') msg = f'infoleak:automatic-detection="credential";{item.get_id()}' self.send_message_to_queue(msg, 'Tags') site_occurence = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_site_for_stats, item.get_id(), item_content, max_time=self.max_execution_time, r_set=False) creds_sites = {} for site in site_occurence: site_domain = site[1:-1].lower() if site_domain in creds_sites.keys(): creds_sites[site_domain] += 1 else: creds_sites[site_domain] = 1 for url in all_sites: self.faup.decode(url) domain = self.faup.get()['domain'] ## TODO: # FIXME: remove me, check faup versionb try: domain = domain.decode() except: pass if domain in creds_sites.keys(): creds_sites[domain] += 1 else: creds_sites[domain] = 1 for site, num in creds_sites.items(): # Send for each different site to moduleStats mssg = f'credential;{num};{site};{item.get_date()}' print(mssg) self.send_message_to_queue(mssg, 'ModuleStats') if all_sites: discovered_sites = ', '.join(all_sites) print(f"=======> Probably on : {discovered_sites}") date = datetime.now().strftime("%Y%m") for cred in all_credentials: maildomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0] self.faup.decode(maildomains) tld = self.faup.get()['tld'] ## TODO: # FIXME: remove me try: tld = tld.decode() except: pass self.server_statistics.hincrby('credential_by_tld:'+date, tld, 1) else: self.redis_logger.info(to_print) print(f'found {nb_cred} credentials') # For searching credential in termFreq for cred in all_credentials: cred = cred.split('@')[0] #Split to ignore mail address # unique number attached to unique path uniq_num_path = self.server_cred.incr(Credential.REDIS_KEY_NUM_PATH) self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET, {item.get_id(): uniq_num_path}) self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET_REV, {uniq_num_path: item.get_id()}) # unique number attached to unique username uniq_num_cred = self.server_cred.hget(Credential.REDIS_KEY_ALL_CRED_SET, cred) if uniq_num_cred is None: # cred do not exist, create new entries uniq_num_cred = self.server_cred.incr(Credential.REDIS_KEY_NUM_USERNAME) self.server_cred.hmset(Credential.REDIS_KEY_ALL_CRED_SET, {cred: uniq_num_cred}) self.server_cred.hmset(Credential.REDIS_KEY_ALL_CRED_SET_REV, {uniq_num_cred: cred}) # Add the mapping between the credential and the path self.server_cred.sadd(Credential.REDIS_KEY_MAP_CRED_TO_PATH+'_'+str(uniq_num_cred), uniq_num_path) # Split credentials on capital letters, numbers, dots and so on # Add the split to redis, each split point towards its initial credential unique number splitedCred = re.findall(Credential.REGEX_CRED, cred) for partCred in splitedCred: if len(partCred) > self.minimumLengthThreshold: self.server_cred.sadd(partCred, uniq_num_cred)