def memory_urls(self, memory_urls): self._memory_urls = [] # If we were given a single string, add that. if RegexHelpers.is_url(str(memory_urls)): if not self.whitelister.is_url_whitelisted(str(memory_urls)): self._memory_urls.append(str(memory_urls)) # Otherwise, try and process it like a list or set. else: try: for url in memory_urls: if RegexHelpers.is_url(str(url)): if not self.whitelister.is_url_whitelisted(str(url)): self._memory_urls.append(str(url)) except TypeError: pass
def dns_requests(self, dns_requests): self._dns_requests = [] try: for request in dns_requests: if isinstance(request, DnsRequest): if not self.whitelister.is_domain_whitelisted(request.request): if RegexHelpers.is_ip(request.answer): if not self.whitelister.is_ip_whitelisted(request.answer): self._dns_requests.append(request) elif RegexHelpers.is_domain(request.answer): if not self.whitelister.is_domain_whitelisted(request.answer): self._dns_requests.append(request) else: self._dns_requests.append(request) except TypeError: pass
def get_all_urls(self): all_urls = [] all_urls += list(self.process_tree_urls) all_urls += list(self.memory_urls) all_urls += list(self.strings_urls) for request in self.http_requests: url = "http://" + request.host + request.uri if RegexHelpers.is_url(url): all_urls.append(url) return sorted(list(set(all_urls)))
def main(): parser = argparse.ArgumentParser() parser.add_argument('-f', '--file-path', action='store', dest='file_path', required=True, default=None, help="Path of file to use for URL extraction.") args = parser.parse_args() with open(args.file_path, "rb") as b: urls = RegexHelpers.find_urls(b.read()) for url in urls: print(url)
def update_phish_info(self, email_list): self.logger.debug("Updating Phish Information section.") # Create the parent div tag. div = self.new_tag("div") # Add the header tag. header = self.new_tag("h2", parent=div) header.string = "Phish E-mail Information" # Create a new table tag. table = self.new_tag("table", parent=div) # Set up the table header row. thead = self.new_tag("thead", parent=table) tr = self.new_tag("tr", parent=thead) titles = [ "URL", "Time", "From", "To", "Subject", "Attachments", "MD5 Hashes", "CC", "BCC", "Reply-To", "Message ID" ] for title in titles: th = self.new_tag("th", parent=tr) th.string = title # Set up the table body rows. tbody = self.new_tag("tbody", parent=table) for email in email_list: if isinstance(email, EmailParser.EmailParser): tr = self.new_tag("tr", parent=tbody) td = self.new_tag("td", parent=tr) if RegexHelpers.is_url(email.reference): link = self.new_tag("a", parent=td) link["href"] = email.reference link.string = "Alert" td = self.new_tag("td", parent=tr) td.string = email.received_time td = self.new_tag("td", parent=tr) td.string = email.from_address td = self.new_tag("td", parent=tr) td.string = email.to_string td = self.new_tag("td", parent=tr) if email.decoded_subject: td.string = email.decoded_subject else: td.string = email.subject td = self.new_tag("td", parent=tr) td.string = email.attachments_string td = self.new_tag("td", parent=tr) td.string = email.md5_string td = self.new_tag("td", parent=tr) td.string = email.cc_string td = self.new_tag("td", parent=tr) td.string = email.bcc_string td = self.new_tag("td", parent=tr) td.string = email.replyto td = self.new_tag("td", parent=tr) td.string = email.message_id self.update_section(div, old_section_id="phish_email_information")
def sha512(self, sha512): if RegexHelpers.is_sha512(str(sha512)): self._sha512 = str(sha512) else: self._sha512 = ""
def extract_indicators(self, check_whitelist=True): # Make an Indicator for the sample's MD5 hash. if RegexHelpers.is_md5(self.md5): try: ind = Indicator.Indicator(self.md5, "Hash - MD5") ind.add_tags("sandboxed_sample") self.iocs.append(ind) except ValueError: pass # Make an Indicator for the sample's SHA1 hash. if RegexHelpers.is_sha1(self.sha1): try: ind = Indicator.Indicator(self.sha1, "Hash - SHA1") ind.add_tags("sandboxed_sample") self.iocs.append(ind) except ValueError: pass # Make an Indicator for the sample's SHA256 hash. if RegexHelpers.is_sha256(self.sha256): try: ind = Indicator.Indicator(self.sha256, "Hash - SHA256") ind.add_tags("sandboxed_sample") self.iocs.append(ind) except ValueError: pass # Make Indicators for any contacted hosts. for host in self.contacted_hosts: # Make an Indicator for the IP itself. if RegexHelpers.is_ip(host.ipv4): try: ind = Indicator.Indicator(host.ipv4, "Address - ipv4-addr") ind.add_tags("contacted_host") if host.protocol and host.port: ind.add_tags(host.protocol + " " + host.port) elif host.protocol and not host.port: indicator.add_tag(host.protocol) self.iocs.append(ind) except ValueError: pass # Make Indicators for any associated domains. for domain in host.associated_domains: if RegexHelpers.is_domain(domain["domain"]): try: ind = Indicator.Indicator(domain["domain"], "URI - Domain Name") ind.add_tags("associated_to_" + host.ipv4) ind.add_relationships(host.ipv4) self.iocs.append(ind) except ValueError: pass # Make Indicators for any DNS requests. for request in self.dns_requests: # Make an Indicator for the requested domain. if RegexHelpers.is_domain(request.request): try: ind = Indicator.Indicator(request.request, "URI - Domain Name") ind.add_tags("dns_request") # If the DNS answer is an IP, add a tag for it and # also create an Indicator for it. if RegexHelpers.is_ip(request.answer): ind.add_tags(request.answer) try: ip_ind = Indicator.Indicator(request.answer, "Address - ipv4-addr") ip_ind.add_tags(["dns_response", request.request]) self.iocs.append(ip_ind) except ValueError: pass self.iocs.append(ind) except ValueError: pass # Make Indicators for any dropped files. # TODO: Add back in the ability to only make Indicators for "interesting" # dropped files, based on file type or file extension. for file in self.dropped_files: # Make an Indicator for the file path. try: ind = Indicator.Indicator(file.path, "Windows - FilePath") ind.add_tags("dropped_file") ind.add_relationships(file.filename) self.iocs.append(ind) except ValueError: pass # Make an Indicator for the file name. try: ind = Indicator.Indicator(file.filename, "Windows - FileName") ind.add_tags("dropped_file") ind.add_relationships([file.path, file.md5, file.sha1, file.sha256]) self.iocs.append(ind) except ValueError: pass # Make an Indicator for the MD5 hash. if RegexHelpers.is_md5(file.md5): try: ind = Indicator.Indicator(file.md5, "Hash - MD5") ind.add_tags([file.filename, "dropped_file"]) ind.add_relationships([file.filename, file.path, file.sha1, file.sha256]) self.iocs.append(ind) except ValueError: pass # Make an Indicator for the SHA1 hash. if RegexHelpers.is_sha1(file.sha1): try: ind = Indicator.Indicator(file.sha1, "Hash - SHA1") ind.add_tags([file.filename, "dropped_file"]) ind.add_relationships([file.filename, file.path, file.md5, file.sha256]) self.iocs.append(ind) except ValueError: pass # Make an Indicator for the SHA256 hash. if RegexHelpers.is_sha256(file.sha256): try: ind = Indicator.Indicator(file.sha256, "Hash - SHA256") ind.add_tags([file.filename, "dropped_file"]) ind.add_relationships([file.filename, file.path, file.md5, file.sha1]) self.iocs.append(ind) except ValueError: pass # Make Indicators for any HTTP requests. for request in self.http_requests: # Check if the host is a domain or IP. if RegexHelpers.is_ip(request.host): indicator_type = "Address - ipv4-addr" # Otherwise it must be a domain. else: indicator_type = "URI - Domain Name" # Make an Indicator for the host. try: ind = Indicator.Indicator(request.host, indicator_type) ind.add_tags(["http_request", request.method]) if request.method == "POST": ind.add_tags("c2") self.iocs.append(ind) except ValueError: pass # Make an Indicator for the URI path. if request.uri != "/": try: ind = Indicator.Indicator(request.uri, "URI - Path") ind.add_tags(["http_request", request.method, request.host]) if request.method == "POST": ind.add_tags("c2") ind.add_relationships(request.host) self.iocs.append(ind) except ValueError: pass # Make an Indicator for the full URL. try: url = "http://" + request.host + request.uri ind = Indicator.Indicator(url, "URI - URL") ind.add_tags(["http_request", request.method]) if request.method == "POST": ind.add_tags("c2") ind.add_relationships([request.host, request.uri]) self.iocs.append(ind) except ValueError: pass # Make an Indicator for the User-Agent. try: ind = Indicator.Indicator(request.user_agent, "URI - HTTP - UserAgent") ind.add_tags(["http_request", request.method, request.host]) if request.method == "POST": ind.add_tags("c2") ind.add_relationships([request.host, request.uri]) self.iocs.append(ind) except ValueError: pass # Make Indicators for any memory URLs. Currently, only VxStream # has this memory URL feature. indicator_list = Indicator.generate_url_indicators(self.memory_urls) # Add some extra tags to the generated indicators and # then add them to our main IOC list. for ind in indicator_list: ind.add_tags("url_in_memory") self.iocs.append(ind) # Make Indicators for any URLs found in the sample's strings. indicator_list = Indicator.generate_url_indicators(self.strings_urls) # Add some extra tags to the generated indicators and # then add them to our main IOC list. for ind in indicator_list: ind.add_tags("url_in_strings") self.iocs.append(ind) # Make Indicators for any URLs found in the sample's process tree. indicator_list = Indicator.generate_url_indicators(self.process_tree_urls) # Add some extra tags to the generated indicators and # then add them to our main IOC list. for ind in indicator_list: ind.add_tags("url_in_process_tree") self.iocs.append(ind) # Make Indicators for any mutexes. for mutex in self.mutexes: try: ind = Indicator.Indicator(mutex, "Windows - Mutex") ind.add_tags("mutex_created") self.iocs.append(ind) except ValueError: pass # Run the IOCs through the whitelists if requested. if check_whitelist: self.iocs = Indicator.run_whitelist(self.iocs) # Finally merge the IOCs so we don't have any duplicates. self.iocs = Indicator.merge_duplicate_indicators(self.iocs)
def __init__(self, smtp_path=None, smtp_text=None, attached_email=True, check_whitelist=True): # Check that we got at least an SMTP path or text: if not smtp_path and not smtp_text: raise ValueError( "You must specify either an SMTP path or the SMTP text.") # In case we received both, default to use the smtp_path over the smtp_text. if smtp_path: # Read the SMTP file. This works with the "smtp.stream" file or in theory # an "smtp.email" type file with the SMTP commands removed. if os.path.exists(smtp_path): self.path = smtp_path self.name = os.path.basename(smtp_path) with open(self.path) as s: smtp_stream = s.read().splitlines() else: smtp_stream = smtp_text.splitlines() # path and name are here just for completeness for anything # external that might rely on them. self.path = "" self.name = hashlib.md5(smtp_text.encode('utf-8')).hexdigest() # A place to store the IOCs. self.iocs = [] # Where did this alert come from? This could be anything, such as # a URL to an ACE alert or whatever other reference you want. self.reference = "" # Find the envelope from/to addresses. This will only work if given an # "smtp.stream" file, since otherwise the SMTP commands will not exist. self.envelope_from = "" self.envelope_to = "" envelope_address_pattern = re.compile(r'.*<(.*)>.*') for line in smtp_stream: if line.startswith("MAIL FROM:"): try: # Make an Indicator for the address. self.envelope_from = envelope_address_pattern.match( line).group(1) try: ind = Indicator.Indicator(self.envelope_from, "Email - Address") ind.add_tags(["phish", "envelope_from_address"]) self.iocs.append(ind) except ValueError: pass except AttributeError: self.envelope_from = "" if line.startswith("RCPT TO:"): try: # Make an Indicator for the address. self.envelope_to = envelope_address_pattern.match( line).group(1) try: ind = Indicator.Indicator(self.envelope_from, "Email - Address") ind.add_tags(["phish", "envelope_to_address"]) self.iocs.append(ind) except ValueError: pass except AttributeError: self.envelope_to = "" # Exchange journaling sends us the e-mail embedded as an attachment within # another e-mail. We need to strip away those outer headers so that we parse # the attached e-mail that we actually care about. #if attached_email: # if "Content-Type: message/rfc822" in smtp_stream: # index = smtp_stream.index("Content-Type: message/rfc822") # smtp_stream = smtp_stream[index:] # Just in case we are dealing with an "smtp.stream" file that still has # the SMTP commands above the actual e-mail, we need to strip those out. # This will remove all lines prior to the Received: headers so that the # email.parser can properly parse out the e-mail. If we were given an # "smtp.email" type of file with the SMTP commands already removed, this # should not affect anything. while not smtp_stream[0].startswith("Received:"): smtp_stream.pop(0) # Join the header lines into a single string. email_text = "\n".join(smtp_stream) # Create the e-mail object. self._email_obj = email.message_from_string(email_text) # If we want to try and parse an embedded/attached e-mail instead... if attached_email: # Walk the full e-mail's parts. for part in self._email_obj.walk(): # Continue if the part looks like a valid e-mail. if part.get_content_type() == "message/rfc822": # Split the part lines into a list. part_text = str(part).splitlines() # Make sure our part starts with the Received: headers. while not part_text[0].startswith("Received:"): part_text.pop(0) part_text = "\n".join(part_text) # Make the new e-mail object. self._email_obj = email.message_from_string(part_text) # Parse the e-mail object for its content. parsed_email = self._parse_content() # Now that we have the e-mail object, parse out some of the interesting parts. self.urls = set() self.headers = self._get_all_headers_string() # Make Indicators for the received headers (SMTP relays) self.received = self.get_header("received") for hop in self.received: for ip in RegexHelpers.find_ip_addresses(hop): try: ind = Indicator.Indicator(ip, "Address - ipv4-addr") ind.add_tags(["phish", "smtp_relay"]) # We consider SMTP relay indicators benign... Don't want to alert every time # we see the relay sending an e-mail, but it's nice to know for correlation. ind.make_benign() self.iocs.append(ind) except ValueError: pass for domain in RegexHelpers.find_domains(hop): if isinstance(domain, tuple): try: ind = Indicator.Indicator(domain[0], "URI - Domain Name") ind.add_tags(["phish", "smtp_relay"]) ind.make_benign() self.iocs.append(ind) except ValueError: pass # Get the e-mail's plaintext body, HTML body, and the visible text from the HTML. self.body = parsed_email["body"] self.html = parsed_email["html"] if self.html: soup = BeautifulSoup(self.html, "html.parser") self.visible_html = "".join(soup.findAll(text=True)) else: self.visible_html = "" # Get any e-mail attachments. self.attachments = parsed_email["attachments"] self.attachments_string = ", ".join( [attach["name"] for attach in self.attachments]) self.md5_string = ", ".join( [attach["md5"] for attach in self.attachments]) # Make an Indicator for the from address. try: self.from_address = self._get_address_list("from")[0][1] try: ind = Indicator.Indicator(self.from_address, "Email - Address") ind.add_tags(["phish", "from_address"]) self.iocs.append(ind) except ValueError: pass except IndexError: self.from_address = "" # Make an Indicator for the reply-to address. try: self.replyto = self._get_address_list("reply-to")[0][1] try: ind = Indicator.Indicator(self.replyto, "Email - Address") ind.add_tags(["phish", "replyto_address"]) if self.from_address: ind.add_relationships(self.from_address) self.iocs.append(ind) except ValueError: pass except IndexError: self.replyto = "" # Make an Indicator for the subject. try: self.subject = "".join(self.get_header("subject")[0].splitlines()) try: ind = Indicator.Indicator(self.subject, "Email - Subject") ind.add_tags(["phish", "subject"]) if self.from_address: ind.add_relationships(self.from_address) self.iocs.append(ind) except ValueError: pass except IndexError: self.subject = "" # Try and decode the subject and make an Indicator. try: self.decoded_subject = "".join( str(make_header(decode_header( self.get_header("subject")[0]))).splitlines()) try: ind = Indicator.Indicator(self.decoded_subject, "Email - Subject") ind.add_tags(["phish", "decoded_subject"]) if self.from_address: ind.add_relationships(self.from_address) self.iocs.append(ind) except ValueError: pass except IndexError: self.decoded_subject = "" # Make an Indicator for each to address. self.to_list = [x[1] for x in self._get_address_list("to")] self.to_string = ", ".join(self.to_list).replace("\t", " ") for address in self.to_list: try: ind = Indicator.Indicator(address, "Email - Address") ind.add_tags(["phish", "to_address"]) if self.from_address: ind.add_relationships(self.from_address) self.iocs.append(ind) except ValueError: pass # Make an Indicator for each CC address. self.cc_list = [x[1] for x in self._get_address_list("cc")] self.cc_string = ", ".join(self.cc_list).replace("\t", " ") for address in self.cc_list: try: ind = Indicator.Indicator(address, "Email - Address") ind.add_tags(["phish", "cc_address"]) if self.from_address: ind.add_relationships(self.from_address) self.iocs.append(ind) except ValueError: pass # Make an Indicator for each BCC address. self.bcc_list = [x[1] for x in self._get_address_list("bcc")] self.bcc_string = ", ".join(self.bcc_list).replace("\t", " ") for address in self.bcc_list: try: ind = Indicator.Indicator(address, "Email - Address") ind.add_tags(["phish", "bcc_address"]) if self.from_address: ind.add_relationships(self.from_address) self.iocs.append(ind) except ValueError: pass # Make an Indicator for the message ID. try: self.message_id = self.get_header("message-id")[0] try: ind = Indicator.Indicator(self.message_id, "Email Message ID") ind.add_tags(["phish", "message_id"]) if self.from_address: ind.add_relationships(self.from_address) self.iocs.append(ind) except ValueError: pass except IndexError: self.message_id = "" # Make an Indicator for the x-mailer. try: self.x_mailer = self.get_header("x-mailer")[0] try: ind = Indicator.Indicator(self.x_mailer, "Email - Xmailer") ind.add_tags(["phish", "x-mailer"]) if self.from_address: ind.add_relationships(self.from_address) self.iocs.append(ind) except ValueError: pass except IndexError: self.x_mailer = "" # Make an Indicator for the x-original-sender. try: self.x_original_sender = self.get_header("x-original-sender")[0] try: ind = Indicator.Indicator(address, "Email - Address") ind.add_tags(["phish", "x-original-sender"]) if self.from_address: ind.add_relationships(self.from_address) self.iocs.append(ind) except ValueError: pass except IndexError: self.x_original_sender = "" # Make an Indicator for the x-originating-ip. try: x_originating_ip = self.get_header("x-originating-ip")[0] # Sometimes this field is in the form: [1.1.1.1] # Make sure we remove any non-IP characters. ip = RegexHelpers.find_ip_addresses(x_originating_ip) if ip: self.x_originating_ip = ip[0] try: ind = Indicator.Indicator(self.x_originating_ip, "Address - ipv4-addr") ind.add_tags(["phish", "x-originating-ip"]) if self.from_address: ind.add_relationships(self.from_address) self.iocs.append(ind) except ValueError: pass except IndexError: self.x_originating_ip = "" # Make an Indicator for the x-sender-ip. try: x_sender_ip = self.get_header("x-sender-ip")[0] # Make sure like the X-Originating-IP that we only # get the IP address and no other characters. ip = RegexHelpers.find_ip_addresses(x_sender_ip) if ip: self.x_sender_ip = ip[0] try: ind = Indicator.Indicator(self.x_sender_ip, "Address - ipv4-addr") ind.add_tags(["phish", "x-sender-ip"]) if self.from_address: ind.add_relationships(self.from_address) self.iocs.append(ind) except ValueError: pass except IndexError: self.x_sender_ip = "" # Make Indicators for any URLs in the plaintext body. # Indicator.generate_url_indicators() catches its own exceptions. text_urls = RegexHelpers.find_urls(self.body) text_urls_indicators = Indicator.generate_url_indicators(text_urls) for ind in text_urls_indicators: ind.add_tags(["phish", "plaintext_body"]) if self.from_address: ind.add_relationships(self.from_address) self.iocs.append(ind) RegexHelpers.find_urls(self.html) # Make Indicators for any URLs in the HTML body. html_urls = RegexHelpers.find_urls(self.html) html_urls_indicators = Indicator.generate_url_indicators(html_urls) for ind in html_urls_indicators: ind.add_tags(["phish", "html_body"]) if self.from_address: ind.add_relationships(self.from_address) self.iocs.append(ind) # Make Indicators for any URLs in the visible text HTML body. visible_html_urls = RegexHelpers.find_urls(self.visible_html) visible_html_urls_indicators = Indicator.generate_url_indicators( visible_html_urls) for ind in visible_html_urls_indicators: ind.add_tags(["phish", "visible_html_body"]) if self.from_address: ind.add_relationships(self.from_address) self.iocs.append(ind) # Make Indicators for different attachment attributes. all_urls = text_urls + html_urls + visible_html_urls for file in self.attachments: # Make Indicators for any strings URLs. if "strings_urls" in file: attachment_strings_urls_indicators = Indicator.generate_url_indicators( file["strings_urls"]) for ind in attachment_strings_urls_indicators: ind.add_tags(["phish", "strings_url", file["name"]]) if self.from_address: ind.add_relationships( [self.from_address, file["name"]]) self.iocs.append(ind) all_urls += file["strings_urls"] # Make an Indicator for the filename. if file["name"]: try: ind = Indicator.Indicator(file["name"], "Windows - FileName") ind.add_tags(["phish", "attachment"]) if self.from_address: ind.add_relationships(self.from_address) if file["md5"]: ind.add_relationships(file["md5"]) if file["sha256"]: ind.add_relationships(file["sha256"]) self.iocs.append(ind) except ValueError: pass # Make an Indicator for the MD5 hash. if file["md5"]: try: ind = Indicator.Indicator(file["md5"], "Hash - MD5") ind.add_tags(["phish", "attachment"]) if self.from_address: ind.add_relationships(self.from_address) if file["name"]: ind.add_tags(file["name"]) ind.add_relationships(file["name"]) if file["sha256"]: ind.add_relationships(file["sha256"]) self.iocs.append(ind) except ValueError: pass # Make an Indicator for the SHA256 hash. if file["sha256"]: try: ind = Indicator.Indicator(file["sha256"], "Hash - SHA256") ind.add_tags(["phish", "attachment"]) if self.from_address: ind.add_relationships(self.from_address) if file["name"]: ind.add_tags(file["name"]) ind.add_relationships(file["name"]) if file["md5"]: ind.add_relationships(file["md5"]) self.iocs.append(ind) except ValueError: pass # Parse the URLs and prevent "duplicate" URLs # like http://blah.com/ and http://blah.com for url in all_urls: # Strip off the ending slash if it's there. if url.endswith("/"): url = url[:-1] self.urls.add(url) self.received_time = self._get_received_time() # Run the IOCs through the whitelists if requested. if check_whitelist: self.iocs = Indicator.run_whitelist(self.iocs) # Finally merge the IOCs so we don't have any duplicates. self.iocs = Indicator.merge_duplicate_indicators(self.iocs)
def md5(self, md5): if RegexHelpers.is_md5(str(md5)): self._md5 = str(md5) else: self._md5 = ""
def ipv4(self, ip): if RegexHelpers.is_ip(ip): self.__ipv4 = ip else: self.__ipv4 = ""
def sha512(self, hash): if RegexHelpers.is_sha512(hash): self.__sha512 = hash else: self.__sha512 = ""
def sha256(self, hash): if RegexHelpers.is_sha256(hash): self.__sha256 = hash else: self.__sha256 = ""
def sha1(self, hash): if RegexHelpers.is_sha1(hash): self.__sha1 = hash else: self.__sha1 = ""
def md5(self, hash): if RegexHelpers.is_md5(hash): self.__md5 = hash else: self.__md5 = ""
def update_sandbox_analysis(self, sandbox_dict): self.logger.debug("Updating Sandbox Analysis section.") # Get a working copy of the sandbox analysis section. #sandbox_analysis = self.get_section("sandbox_analysis") # Create the parent div tag. div = self.new_tag("div") # Continue if we were given a sandbox dictionary. if sandbox_dict: # Add the header tag. header = self.new_tag("h2", parent=div) header.string = "Sandbox Analysis" for hash in sandbox_dict: # Get a single deduped version of the reports. dedup_report = BaseSandboxParser.dedup_reports( sandbox_dict[hash]) # Add a header for the sample's filename. header = self.new_tag("h3", parent=div) header.string = dedup_report.filename #################### ## ## ## SANDBOX URLS ## ## ## #################### self.logger.debug("Updating sandbox URLs for " + hash) # Make the new sub-section. sandbox_urls_section_id = "sandbox_urls_" + hash sandbox_urls_section = self.make_section( sandbox_urls_section_id, parent=div) # Create a new parent div for the sub-section. sandbox_urls_div = self.new_tag("div") # Add a header tag for the URLs. header = self.new_tag("h4", parent=sandbox_urls_div) header.string = "Sandbox URLs" # Add an unordered list for the reports. ul = self.new_tag("ul", parent=sandbox_urls_div) # Add list items for each report. for report in sandbox_dict[hash]: li = self.new_tag("li", parent=ul) li.string = report.sandbox_display_name + " = " link = self.new_tag("a", parent=li) link["href"] = report.sandbox_url link.string = report.filename if sandbox_dict[hash][0].sha256: li = self.new_tag("li", parent=ul) link = self.new_tag("a", parent=li) link[ "href"] = "https://virustotal.com/en/file/" + sandbox_dict[ hash][0].sha256 + "/analysis/" link.string = "VirusTotal" # Update the sub-section. self.update_section(sandbox_urls_div, old_section_soup=sandbox_urls_section) ################### ## ## ## SCREENSHOTS ## ## ## ################### # Only continue if there are actually some screenshots. if any(report.screenshot_path for report in sandbox_dict[hash]): self.logger.debug("Updating screenshots for " + hash) # Make the new sub-section. screenshot_section_id = "screenshot_" + hash screenshot_section = self.make_section( screenshot_section_id, parent=div) # Create a new parent div for the sub-section. screenshots_div = self.new_tag("div") # Add a header tag for the screenshots. header = self.new_tag("h4", parent=screenshots_div) header.string = "Screenshots" for report in sandbox_dict[hash]: if report.screenshot_path: screenshot_name = os.path.basename( report.screenshot_path) # Upload the screenshot as an attachment if it doesn't already exist. if not self.attachment_exists(screenshot_name): self.attach_file(report.screenshot_path) # If the screenshot attachment exists, add an img tag for it. if self.attachment_exists(screenshot_name): title_p = self.new_tag("p", parent=screenshots_div) title_p[ "style"] = "color:#009000; font-weight:bold;" title_p.string = report.sandbox_display_name + " - " + report.sandbox_vm_name img_p = self.new_tag("p", parent=screenshots_div) img = self.new_tag("img", parent=img_p) img["width"] = "1000" img["height"] = "562" src = "/download/attachments/" + str( self.get_page_id() ) + "/" + screenshot_name + "?effects=border-simple,blur-border,tape" img["src"] = src self.update_section(screenshots_div, old_section_soup=screenshot_section) ############### ## ## ## MUTEXES ## ## ## ############### # Only continue if there are actually some mutexes. if dedup_report.mutexes: self.logger.debug("Updating mutexes for " + hash) # Make the new sub-section. mutexes_section_id = "mutexes_" + hash mutex_section = self.make_section(mutexes_section_id, parent=div) # Create a new parent div for the sub-section. mutexes_div = self.new_tag("div") # Add a header tag for the mutexes. header = self.new_tag("h4", parent=mutexes_div) header.string = "Mutexes" # Add a pre tag to hold them. pre = self.new_tag("pre", parent=mutexes_div) pre["style"] = "border:1px solid gray;padding:5px;" pre.string = "" for mutex in dedup_report.mutexes: pre.string += mutex + "\n" self.update_section(mutexes_div, old_section_soup=mutex_section) ##################### ## ## ## DROPPED FILES ## ## ## ##################### # Only continue if there are actually any dropped files. if dedup_report.dropped_files: self.logger.debug("Updating dropped files for " + hash) # Make the new sub-section. dropped_section_id = "dropped_" + hash dropped_section = self.make_section(dropped_section_id, parent=div) # Create a new parent div for the sub-section. dropped_div = self.new_tag("div") # Add a header tag for the dropped files. header = self.new_tag("h4", parent=dropped_div) header.string = "Dropped Files" # Create a new table tag. table = self.new_tag("table", parent=dropped_div) # Set up the table header row. thead = self.new_tag("thead", parent=table) tr = self.new_tag("tr", parent=thead) titles = [ "VirusTotal", "Filename", "Path", "Size", "Type", "MD5", "SHA256" ] for title in titles: th = self.new_tag("th", parent=tr) th.string = title # Set up the table body rows. tbody = self.new_tag("tbody", parent=table) for file in dedup_report.dropped_files: tr = self.new_tag("tr", parent=tbody) td = self.new_tag("td", parent=tr) if file.sha256: url = self.new_tag("a", parent=td) vt_url = "https://virustotal.com/en/file/" + file.sha256 + "/analysis/" url["href"] = vt_url url.string = "VT" td = self.new_tag("td", parent=tr) td.string = file.filename td = self.new_tag("td", parent=tr) td.string = file.path td = self.new_tag("td", parent=tr) td.string = file.size td = self.new_tag("td", parent=tr) td.string = file.type td = self.new_tag("td", parent=tr) td.string = file.md5 td = self.new_tag("td", parent=tr) td.string = file.sha256 # Update the sub-section. self.update_section(dropped_div, old_section_soup=dropped_section) #################### ## ## ## DNS REQUESTS ## ## ## #################### # Only continue if there are actually any dropped files. if dedup_report.dns_requests: self.logger.debug("Updating DNS requests for " + hash) # Make the new sub-section. dns_section_id = "dns_" + hash dns_section = self.make_section(dns_section_id, parent=div) # Create a new parent div for the sub-section. dns_div = self.new_tag("div") # Add a header tag for the DNS requests. header = self.new_tag("h4", parent=dns_div) header.string = "DNS Requests" # Create a new table tag. table = self.new_tag("table", parent=dns_div) # Set up the table header row. thead = self.new_tag("thead", parent=table) tr = self.new_tag("tr", parent=thead) titles = [ "VirusTotal", "Request", "Type", "VirusTotal", "Answer", "Answer Type" ] for title in titles: th = self.new_tag("th", parent=tr) th.string = title # Set up the table body rows. tbody = self.new_tag("tbody", parent=table) for request in dedup_report.dns_requests: tr = self.new_tag("tr", parent=tbody) td = self.new_tag("td", parent=tr) url = self.new_tag("a", parent=td) vt_url = "https://virustotal.com/en/domain/" + request.request + "/information/" url["href"] = vt_url url.string = "VT" td = self.new_tag("td", parent=tr) td.string = request.request td = self.new_tag("td", parent=tr) td.string = request.type td = self.new_tag("td", parent=tr) if request.answer: if RegexHelpers.is_ip(request.answer): vt_url = "https://virustotal.com/en/ip-address/" + request.answer + "/information/" else: vt_url = "https://virustotal.com/en/domain/" + request.answer + "/information/" url = self.new_tag("a", parent=td) url["href"] = vt_url url.string = "VT" td = self.new_tag("td", parent=tr) td.string = request.answer td = self.new_tag("td", parent=tr) td.string = request.answer_type # Update the sub-section. self.update_section(dns_div, old_section_soup=dns_section) ##################### ## ## ## HTTP REQUESTS ## ## ## ##################### # Only continue if there are actually any dropped files. if dedup_report.http_requests: self.logger.debug("Updating HTTP requests for " + hash) # Make the new sub-section. http_section_id = "http_" + hash http_section = self.make_section(http_section_id, parent=div) # Create a new parent div for the sub-section. http_div = self.new_tag("div") # Add a header tag for the DNS requests. header = self.new_tag("h4", parent=http_div) header.string = "HTTP Requests" # Create a new table tag. table = self.new_tag("table", parent=http_div) # Set up the table header row. thead = self.new_tag("thead", parent=table) tr = self.new_tag("tr", parent=thead) titles = [ "VirusTotal", "Method", "Host", "URI", "Port", "User-Agent" ] for title in titles: th = self.new_tag("th", parent=tr) th.string = title # Set up the table body rows. tbody = self.new_tag("tbody", parent=table) for request in dedup_report.http_requests: tr = self.new_tag("tr", parent=tbody) td = self.new_tag("td", parent=tr) url = self.new_tag("a", parent=td) full_url = "http://" + request.host + request.uri url_hash = hashlib.sha256( full_url.encode()).hexdigest() vt_url = "https://virustotal.com/en/url/" + url_hash + "/analysis/" url["href"] = vt_url url.string = "VT" td = self.new_tag("td", parent=tr) td.string = request.method td = self.new_tag("td", parent=tr) td.string = request.host td = self.new_tag("td", parent=tr) td.string = request.uri td = self.new_tag("td", parent=tr) td.string = request.port td = self.new_tag("td", parent=tr) td.string = request.user_agent # Update the sub-section. self.update_section(http_div, old_section_soup=http_section) ####################### ## ## ## CONTACTED HOSTS ## ## ## ####################### # Only continue if there are actually any dropped files. if dedup_report.contacted_hosts: self.logger.debug("Updating contacted hosts for " + hash) # Make the new sub-section. hosts_section_id = "hosts_" + hash hosts_section = self.make_section(hosts_section_id, parent=div) # Create a new parent div for the sub-section. hosts_div = self.new_tag("div") # Add a header tag for the DNS requests. header = self.new_tag("h4", parent=hosts_div) header.string = "Contacted Hosts" # Create a new table tag. table = self.new_tag("table", parent=hosts_div) # Set up the table header row. thead = self.new_tag("thead", parent=table) tr = self.new_tag("tr", parent=thead) titles = [ "VirusTotal", "Tor Node", "Address", "Port", "Protocol", "Location", "Associated Domains" ] for title in titles: th = self.new_tag("th", parent=tr) th.string = title # Set up the table body rows. tbody = self.new_tag("tbody", parent=table) for host in dedup_report.contacted_hosts: tr = self.new_tag("tr", parent=tbody) td = self.new_tag("td", parent=tr) url = self.new_tag("a", parent=td) vt_url = "https://virustotal.com/en/ip-address/" + host.ipv4 + "/information/" url["href"] = vt_url url.string = "VT" td = self.new_tag("td", parent=tr) if self.whitelister.is_tor_node(host.ipv4): td.string = "True" td = self.new_tag("td", parent=tr) td.string = host.ipv4 td = self.new_tag("td", parent=tr) td.string = host.port td = self.new_tag("td", parent=tr) td.string = host.protocol td = self.new_tag("td", parent=tr) td.string = host.location td = self.new_tag("td", parent=tr) td.string = host.associated_domains_string # Update the sub-section. self.update_section(hosts_div, old_section_soup=hosts_section) ##################### ## ## ## PROCESS TREES ## ## ## ##################### # Only continue if there are actually some process trees. if dedup_report.process_tree_list: self.logger.debug("Updating process tree for " + hash) # Make the new sub-section. process_section_id = "process_" + hash process_section = self.make_section(process_section_id, parent=div) # Create a new parent div for the sub-section. process_div = self.new_tag("div") # Add a header tag for the mutexes. header = self.new_tag("h4", parent=process_div) header.string = "Process Tree" # Add a pre tag to hold them. pre = self.new_tag("pre", parent=process_div) pre["style"] = "border:1px solid gray;padding:5px;" pre.string = "" for tree in dedup_report.process_tree_list: pre.string += tree + "\n" self.update_section(process_div, old_section_soup=process_section) self.update_section(div, old_section_id="sandbox_analysis")
def generate_url_indicators(url_list, whitelister=None): indicators = [] # In case we were given a string (a single URL), add it # to a list for consistent processing. if isinstance(url_list, str): url_list = [url_list] # Parse the URLs so that we can create Indicators and also prevent # "duplicate" URLs like http://blah.com/ and http://blah.com for url in url_list: if RegexHelpers.is_url(url): # Strip off the ending slash if it's there. if url.endswith("/"): url = url[:-1] parsed_url = urlsplit(url) # Is the netloc an IP address? if RegexHelpers.is_ip(parsed_url.netloc): netloc_type = "Address - ipv4-addr" # If the netloc is not an IP, it must be a domain. else: netloc_type = "URI - Domain Name" # Make an Indicator for the URI host. try: ind = Indicator(parsed_url.netloc, netloc_type) ind.add_tags("uri_host") ind.add_relationships(url) indicators.append(ind) except ValueError: pass # Make an Indicator for the full URL. try: ind = Indicator(url, "URI - URL") ind.add_relationships(parsed_url.netloc) indicators.append(ind) except ValueError: pass # Make an Indicator for the path (if there is one). if parsed_url.path and parsed_url.path != "/": try: ind = Indicator(parsed_url.path, "URI - Path") ind.add_tags(["uri_path", parsed_url.netloc]) ind.add_relationships([url, parsed_url.netloc]) indicators.append(ind) except ValueError: pass # Make an Indicator for the path including the query items. if parsed_url.path and parsed_url.path != "/": try: # Check if there were any ? query items. if parsed_url.query: uri_path = parsed_url.path + "?" + parsed_url.query ind = Indicator(uri_path, "URI - Path") ind.add_tags(["uri_path", parsed_url.netloc]) ind.add_relationships([url, parsed_url.netloc]) indicators.append(ind) except ValueError: pass return indicators
def sha1(self, sha1): if RegexHelpers.is_sha1(str(sha1)): self._sha1 = str(sha1) else: self._sha1 = ""
def _parse_attachment(self, message_part, charset): part_items = message_part.items() for tup in part_items: for value in tup: if "attachment" in value: file_data = message_part.get_payload() attachment_dict = {} if message_part.get("Content-Transfer-Encoding", None) == "base64": file_data_b64 = file_data.replace("\n", "") # For some reason, sometimes the attachments don't have the proper # padding. Add a couple "==" on the end for good measure. This doesn't # seem to harm correctly encoded attachments. file_data_decoded = base64.b64decode(file_data_b64 + "==") # Try and get strings out of the attachment. strings_list = RegexHelpers.find_strings( file_data_decoded) strings = " ".join(strings_list) # Look for any URLs that were in the strings. strings_urls = RegexHelpers.find_urls(strings) attachment_dict["strings_urls"] = strings_urls elif message_part.get_content_type() == "text/html": file_data_decoded = message_part.get_payload( decode=True).decode(charset).encode('utf-8') else: file_data_decoded = file_data try: md5_hasher = hashlib.md5() md5_hasher.update(file_data_decoded) md5_hash = md5_hasher.hexdigest() except TypeError: md5_hash = "" try: sha256_hasher = hashlib.sha256() sha256_hasher.update(file_data_decoded) sha256_hash = sha256_hasher.hexdigest() except TypeError: sha256_hash = "" attachment_dict["data"] = file_data_decoded attachment_dict[ "content_type"] = message_part.get_content_type() attachment_dict["size"] = len(file_data_decoded) attachment_dict["md5"] = md5_hash attachment_dict["sha256"] = sha256_hash attachment_dict["name"] = "" attachment_dict["create_date"] = None attachment_dict["mod_date"] = None attachment_dict["read_date"] = None # Find the attachment name. Normally this follows a specific format # and is called 'filename=' but recently I've seen some that are in # different locations are are just called 'name='... Hence removing # old code and replacing with a regex statement to account for either # name in any location in the message part. attachment_name_pattern = re.compile( r'(file)?name="(.*?)"') for tup in part_items: for item in tup: attachment_name = attachment_name_pattern.search( item) if attachment_name: attachment_dict[ "name"] = RegexHelpers.decode_utf_b64_string( attachment_name.groups()[1]) return attachment_dict return None
def sha256(self, sha256): if RegexHelpers.is_sha256(str(sha256)): self._sha256 = str(sha256) else: self._sha256 = ""