def _search_phishing(self, greedy_data): with_urls = False # If mail is filtered don't check for phishing is_filtered = greedy_data["tokenizer"][2] if is_filtered: return False, False, False # Reset phishing bitmap self._pb.reset_score() # Outputs targets = set() # Get all data mail = greedy_data["tokenizer"][1] body = mail.get('body') subject = mail.get('subject') from_ = mail.get('from') urls_body = greedy_data["urls-handler-body"][2] urls_attachments = greedy_data["urls-handler-attachments"][2] # TODO: if an attachment is filtered, the score is not complete # more different mails can have the same attachment # more different attachments can have the same mail attachments = MailAttachments(greedy_data["attachments"][2]) urls = ( (urls_body, 'urls_body'), (urls_attachments, 'urls_attachments')) # Mapping for targets checks mapping_targets = ( (body, 'mail_body'), (from_, 'mail_from'), (attachments.payloadstext(), 'text_attachments'), (attachments.filenamestext(), 'filename_attachments')) for k, v in mapping_targets: if k: matcher = partial(search_words_given_key, k) t = set(i for i in map(matcher, self._t_keys.iteritems()) if i) if t: targets |= t self._pb.set_property_score(v) # Check urls # Target not added because urls come already analyzed text for k, v in urls: if k: with_urls = True if any(check_urls(k, i) for i in self._t_keys.values()): self._pb.set_property_score(v) # Check subject if swt(subject, self._s_keys): self._pb.set_property_score("mail_subject") return self._pb.score, list(targets), with_urls
class Attachments(AbstractBolt): outputs = ['sha256_random', 'with_attachments', 'attachments'] def initialize(self, stormconf, context): super(Attachments, self).initialize(stormconf, context) self.attach = MailAttachments() self._load_settings() def _load_settings(self): # Loading configuration self._load_lists() settings = copy.deepcopy(self.conf) settings.update({ "filter_cont_types": self._filter_cont_types, "tika_whitelist_cont_types": self._tika_whitelist_cont_types }) self.attach.reload(**settings) def _load_lists(self): # Load content types to filter self._filter_cont_types = load_keywords_list( self.conf["content_types_blacklist"], lower=False) self.log("Content types to filter reloaded") # Load Tika content types to analyze self._tika_whitelist_cont_types = set() if self.conf["tika"]["enabled"]: self._tika_whitelist_cont_types = load_keywords_list( self.conf["tika"]["valid_content_types"], lower=False) self.log("Whitelist Tika content types reloaded") def process_tick(self, freq): """Every freq seconds you reload the keywords. """ super(Attachments, self).process_tick(freq) self._load_settings() def process(self, tup): try: sha256_random = tup.values[0] with_attachments = tup.values[1] # Remove all values self.attach.removeall() # Add the new values self.attach.extend(tup.values[2]) # Run analysis # self.attach.run() == self.attach() self.attach.run() except Error, e: self.raise_exception(e, tup) else:
def process(self, tup): try: sha256_rand, mail = self._make_mail(tup) with_attachments = False attachments = [] body = self.parser.body # If filter network is enabled is_filtered = False if self.filter_network_enabled: if mail["sender_ip"] in self._network_analyzed: is_filtered = True # Update databese mail analyzed self._network_analyzed.append(mail["sender_ip"]) # If filter mails is enabled is_filtered = False if self.filter_mails_enabled: if mail["sha1"] in self._mails_analyzed: mail.pop("body", None) body = six.text_type() is_filtered = True # Update databese mail analyzed self._mails_analyzed.append(mail["sha1"]) # Emit only attachments raw_attach = self.parser.attachments_list if raw_attach: with_attachments = True attachments = MailAttachments.withhashes(raw_attach) # If filter attachments is enabled if self.filter_attachments_enabled: hashes = attachments.filter(self._attachments_analyzed) self._attachments_analyzed.extend(hashes) except TypeError: pass except UnicodeDecodeError: pass else: # Emit network self.emit([sha256_rand, mail["sender_ip"], is_filtered], stream="network") # Emit mail self.emit([sha256_rand, mail, is_filtered], stream="mail") # Emit body self.emit([sha256_rand, body, is_filtered], stream="body") self.emit([sha256_rand, with_attachments, list(attachments)], stream="attachments")
class Attachments(AbstractBolt): outputs = ['sha256_random', 'with_attachments', 'attachments'] def initialize(self, stormconf, context): super(Attachments, self).initialize(stormconf, context) self.attach = MailAttachments() self._load_settings() def _load_settings(self): # Loading configuration settings = self._load_lists() self.attach.reload(**settings) def _load_lists(self): settings = copy.deepcopy(self.conf) for k in self.conf: for i, j in self.conf[k].get("lists", {}).items(): settings[k][i] = load_keywords_list(j) self.log("Loaded lists {!r} for {!r}".format(i, k), "debug") self.log( "Keys[{!r}][{!r}]: {}".format(k, i, ", ".join(settings[k][i])), "debug") else: return settings def process_tick(self, freq): """Every freq seconds you reload the keywords. """ super(Attachments, self).process_tick(freq) self._load_settings() def process(self, tup): try: sha256_random = tup.values[0] sha256 = sha256_random.split("_")[0] self.log("Processing started: {}".format(sha256)) with_attachments = tup.values[1] # Remove all values self.attach.removeall() # Add the new values self.attach.extend(tup.values[2]) # Run analysis # self.attach.run() == self.attach() self.attach.run() except BinAsciiError, e: self.raise_exception(e, tup) else:
def _get_urls(self, greedy_data): # If mail is filtered don't check for urls is_filtered = greedy_data["tokenizer"][2] results = {} # urls body if not is_filtered: text = greedy_data["tokenizer"][1] urls = text2urls_whitelisted(text, self.whitelists, self.faup) if urls: results["body"] = urls # I can have 2 mails with same body, but with different attachments attachments = MailAttachments(greedy_data["attachments"][2]) text = attachments.payloadstext() urls = text2urls_whitelisted(text, self.whitelists, self.faup) if urls: results["attachments"] = urls return results
def process(self, tup): try: sha256_rand, mail = self._make_mail(tup) sha256 = sha256_rand.split("_")[0] self.log("Processing started: {}".format(sha256)) with_attachments = False attachments = [] body = self.parser.body raw_mail = tup.values[0] mail_type = tup.values[5] # If filter network is enabled is_filtered_net = False if self.filter_network: if mail["sender_ip"] in self.analyzed_network: is_filtered_net = True # Update database ip addresses analyzed self.analyzed_network.append(mail["sender_ip"]) # If filter mails is enabled is_filtered_mail = False if self.filter_mails: if mail["sha1"] in self.analyzed_mails: mail.pop("body", None) body = six.text_type() raw_mail = six.text_type() is_filtered_mail = True # Update database mails analyzed self.analyzed_mails.append(mail["sha1"]) if self.parser.attachments: with_attachments = True attachments = MailAttachments.withhashes( self.parser.attachments) # If filter attachments is enabled if self.filter_attachments: hashes = attachments.filter(self.analyzed_attachments) self.analyzed_attachments.extend(hashes) except TypeError, e: self.raise_exception(e, tup)
def process(self, tup): try: sha256_rand, mail = self._make_mail(tup) with_attachments = False attachments = [] body = self.parser.body # If filter network is enabled is_filtered = False if self.filter_network_enabled: if mail["sender_ip"] in self._network_analyzed: is_filtered = True # Update databese mail analyzed self._network_analyzed.append(mail["sender_ip"]) # If filter mails is enabled is_filtered = False if self.filter_mails_enabled: if mail["sha1"] in self._mails_analyzed: mail.pop("body", None) body = six.text_type() is_filtered = True # Update databese mail analyzed self._mails_analyzed.append(mail["sha1"]) # Emit only attachments raw_attach = self.parser.attachments_list if raw_attach: with_attachments = True attachments = MailAttachments.withhashes(raw_attach) # If filter attachments is enabled if self.filter_attachments_enabled: hashes = attachments.filter(self._attachments_analyzed) self._attachments_analyzed.extend(hashes) except TypeError, e: self.raise_exception(e, tup)
def process(self, tup): sha256_mail_random = tup.values[0] attachments = MailAttachments(tup.values[2]) text = attachments.payloadstext() with_urls, urls = self._extract_urls(text) self.emit([sha256_mail_random, with_urls, urls])
def initialize(self, stormconf, context): super(Attachments, self).initialize(stormconf, context) self.attach = MailAttachments() self._load_settings()