def _check_attachments(self, attachments, keywords): all_filenames = u"" all_contents = u"" for i in attachments: try: all_filenames += i["filename"] + u"\n" if i.get("is_archive"): for j in i.get("files"): all_filenames += j["filename"] + u"\n" all_contents += \ j["payload"].decode('base64') + u"\n" else: all_contents += i["payload"].decode('base64') + u"\n" except KeyError: continue except UnicodeDecodeError: continue return swt(all_filenames, keywords), swt(all_contents, keywords)
def _search_phishing(self, greedy_data): # Reset phishing bitmap self._pb.reset_score() # Outputs targets = set() # Get Tokenizer mail = greedy_data['tokenizer'][1] body = mail.get('body') subject = mail.get('subject') from_ = mail.get('from') # Get Urls in body with_urls_body = greedy_data['urls-handler-body'][1] urls = None if with_urls_body: urls = greedy_data['urls-handler-body'][2] # Get Urls attachments with_urls_attachments = greedy_data['urls-handler-attachments'][1] urls_attachments = None if with_urls_attachments: urls_attachments = greedy_data['urls-handler-attachments'][2] # Get Attachments with_attachments = greedy_data['attachments'][1] attachments = None if with_attachments: attachments = greedy_data['attachments'][2] # Check body if body: for k, v in self._t_keys.iteritems(): if swt(body, v): targets.add(k) if 'mail_body' not in self._pb.score_properties: self._pb.set_property_score("mail_body") # Check urls body # Target not added because urls come from body if urls: for k, v in self._t_keys.iteritems(): if 'urls_body' not in self._pb.score_properties: if self._check_urls(urls, v): self._pb.set_property_score("urls_body") # Check from if from_: for k, v in self._t_keys.iteritems(): if swt(from_, v): targets.add(k) if 'mail_from' not in self._pb.score_properties: self._pb.set_property_score("mail_from") # Check attachments filename and text match if with_attachments: for k, v in self._t_keys.iteritems(): filename_match, text_match = \ self._check_attachments(attachments, v) if filename_match or text_match: targets.add(k) if (filename_match and 'filename_attachments' not in self._pb.score_properties): self._pb.set_property_score("filename_attachments") if (text_match and 'text_attachments' not in self._pb.score_properties): self._pb.set_property_score("text_attachments") # Check urls attachments # Target not added because urls come from attachments content if urls_attachments: for k, v in self._t_keys.iteritems(): if 'urls_attachments' not in self._pb.score_properties: if self._check_urls(urls_attachments, v): self._pb.set_property_score("urls_attachments") # Check subject if swt(subject, self._s_keys): if 'mail_subject' not in self._pb.score_properties: self._pb.set_property_score("mail_subject") return self._pb.score, targets
def _check_urls(self, urls, keywords): for domain, details in urls.iteritems(): for i in details: if swt(i['url'], keywords): return True