def memory_urls(self, memory_urls):
     self._memory_urls = []
     
     # If we were given a single string, add that.
     if RegexHelpers.is_url(str(memory_urls)):
         if not self.whitelister.is_url_whitelisted(str(memory_urls)):
             self._memory_urls.append(str(memory_urls))
     # Otherwise, try and process it like a list or set.
     else:
         try:
             for url in memory_urls:
                 if RegexHelpers.is_url(str(url)):
                     if not self.whitelister.is_url_whitelisted(str(url)):
                         self._memory_urls.append(str(url))
         except TypeError:
             pass
 def dns_requests(self, dns_requests):
     self._dns_requests = []
     
     try:
         for request in dns_requests:
             if isinstance(request, DnsRequest):
                 if not self.whitelister.is_domain_whitelisted(request.request):
                     if RegexHelpers.is_ip(request.answer):
                         if not self.whitelister.is_ip_whitelisted(request.answer):
                             self._dns_requests.append(request)
                     elif RegexHelpers.is_domain(request.answer):
                         if not self.whitelister.is_domain_whitelisted(request.answer):
                             self._dns_requests.append(request)
                     else:
                         self._dns_requests.append(request)
     except TypeError:
         pass
    def get_all_urls(self):
        all_urls = []
        all_urls += list(self.process_tree_urls)
        all_urls += list(self.memory_urls)
        all_urls += list(self.strings_urls)
        for request in self.http_requests:
            url = "http://" + request.host + request.uri
            if RegexHelpers.is_url(url):
                all_urls.append(url)

        return sorted(list(set(all_urls)))
Exemple #4
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('-f',
                        '--file-path',
                        action='store',
                        dest='file_path',
                        required=True,
                        default=None,
                        help="Path of file to use for URL extraction.")

    args = parser.parse_args()

    with open(args.file_path, "rb") as b:
        urls = RegexHelpers.find_urls(b.read())

    for url in urls:
        print(url)
    def update_phish_info(self, email_list):
        self.logger.debug("Updating Phish Information section.")

        # Create the parent div tag.
        div = self.new_tag("div")

        # Add the header tag.
        header = self.new_tag("h2", parent=div)
        header.string = "Phish E-mail Information"

        # Create a new table tag.
        table = self.new_tag("table", parent=div)

        # Set up the table header row.
        thead = self.new_tag("thead", parent=table)
        tr = self.new_tag("tr", parent=thead)
        titles = [
            "URL", "Time", "From", "To", "Subject", "Attachments",
            "MD5 Hashes", "CC", "BCC", "Reply-To", "Message ID"
        ]
        for title in titles:
            th = self.new_tag("th", parent=tr)
            th.string = title

        # Set up the table body rows.
        tbody = self.new_tag("tbody", parent=table)
        for email in email_list:
            if isinstance(email, EmailParser.EmailParser):
                tr = self.new_tag("tr", parent=tbody)

                td = self.new_tag("td", parent=tr)
                if RegexHelpers.is_url(email.reference):
                    link = self.new_tag("a", parent=td)
                    link["href"] = email.reference
                    link.string = "Alert"

                td = self.new_tag("td", parent=tr)
                td.string = email.received_time

                td = self.new_tag("td", parent=tr)
                td.string = email.from_address

                td = self.new_tag("td", parent=tr)
                td.string = email.to_string

                td = self.new_tag("td", parent=tr)
                if email.decoded_subject:
                    td.string = email.decoded_subject
                else:
                    td.string = email.subject

                td = self.new_tag("td", parent=tr)
                td.string = email.attachments_string

                td = self.new_tag("td", parent=tr)
                td.string = email.md5_string

                td = self.new_tag("td", parent=tr)
                td.string = email.cc_string

                td = self.new_tag("td", parent=tr)
                td.string = email.bcc_string

                td = self.new_tag("td", parent=tr)
                td.string = email.replyto

                td = self.new_tag("td", parent=tr)
                td.string = email.message_id

        self.update_section(div, old_section_id="phish_email_information")
 def sha512(self, sha512):
     if RegexHelpers.is_sha512(str(sha512)):
         self._sha512 = str(sha512)
     else:
         self._sha512 = ""
    def extract_indicators(self, check_whitelist=True):
        # Make an Indicator for the sample's MD5 hash.
        if RegexHelpers.is_md5(self.md5):
            try:
                ind = Indicator.Indicator(self.md5, "Hash - MD5")
                ind.add_tags("sandboxed_sample")
                self.iocs.append(ind)
            except ValueError:
                pass
        
        # Make an Indicator for the sample's SHA1 hash.
        if RegexHelpers.is_sha1(self.sha1):
            try:
                ind = Indicator.Indicator(self.sha1, "Hash - SHA1")
                ind.add_tags("sandboxed_sample")
                self.iocs.append(ind)
            except ValueError:
                pass
            
        # Make an Indicator for the sample's SHA256 hash.
        if RegexHelpers.is_sha256(self.sha256):
            try:
                ind = Indicator.Indicator(self.sha256, "Hash - SHA256")
                ind.add_tags("sandboxed_sample")
                self.iocs.append(ind)
            except ValueError:
                pass
            
        # Make Indicators for any contacted hosts.
        for host in self.contacted_hosts:
            # Make an Indicator for the IP itself.
            if RegexHelpers.is_ip(host.ipv4):
                try:
                    ind = Indicator.Indicator(host.ipv4, "Address - ipv4-addr")
                    ind.add_tags("contacted_host")
                    if host.protocol and host.port:
                        ind.add_tags(host.protocol + " " + host.port)
                    elif host.protocol and not host.port:
                        indicator.add_tag(host.protocol)
                    self.iocs.append(ind)
                except ValueError:
                    pass

                # Make Indicators for any associated domains.
                for domain in host.associated_domains:
                    if RegexHelpers.is_domain(domain["domain"]):
                        try:
                            ind = Indicator.Indicator(domain["domain"], "URI - Domain Name")
                            ind.add_tags("associated_to_" + host.ipv4)
                            ind.add_relationships(host.ipv4)
                            self.iocs.append(ind)
                        except ValueError:
                            pass

        # Make Indicators for any DNS requests.
        for request in self.dns_requests:
            # Make an Indicator for the requested domain.
            if RegexHelpers.is_domain(request.request):
                try:
                    ind = Indicator.Indicator(request.request, "URI - Domain Name")
                    ind.add_tags("dns_request")
                    # If the DNS answer is an IP, add a tag for it and
                    # also create an Indicator for it.
                    if RegexHelpers.is_ip(request.answer):
                        ind.add_tags(request.answer)

                        try:
                            ip_ind = Indicator.Indicator(request.answer, "Address - ipv4-addr")
                            ip_ind.add_tags(["dns_response", request.request])
                            self.iocs.append(ip_ind)
                        except ValueError:
                            pass

                    self.iocs.append(ind)
                except ValueError:
                    pass
                
        # Make Indicators for any dropped files.
        # TODO: Add back in the ability to only make Indicators for "interesting"
        # dropped files, based on file type or file extension.
        for file in self.dropped_files:
            # Make an Indicator for the file path.
            try:
                ind = Indicator.Indicator(file.path, "Windows - FilePath")
                ind.add_tags("dropped_file")
                ind.add_relationships(file.filename)
                self.iocs.append(ind)
            except ValueError:
                pass

            # Make an Indicator for the file name.
            try:
                ind = Indicator.Indicator(file.filename, "Windows - FileName")
                ind.add_tags("dropped_file")
                ind.add_relationships([file.path, file.md5, file.sha1, file.sha256])
                self.iocs.append(ind)
            except ValueError:
                pass

            # Make an Indicator for the MD5 hash.
            if RegexHelpers.is_md5(file.md5):
                try:
                    ind = Indicator.Indicator(file.md5, "Hash - MD5")
                    ind.add_tags([file.filename, "dropped_file"])
                    ind.add_relationships([file.filename, file.path, file.sha1, file.sha256])
                    self.iocs.append(ind)
                except ValueError:
                    pass

            # Make an Indicator for the SHA1 hash.
            if RegexHelpers.is_sha1(file.sha1):
                try:
                    ind = Indicator.Indicator(file.sha1, "Hash - SHA1")
                    ind.add_tags([file.filename, "dropped_file"])
                    ind.add_relationships([file.filename, file.path, file.md5, file.sha256])
                    self.iocs.append(ind)
                except ValueError:
                    pass

            # Make an Indicator for the SHA256 hash.
            if RegexHelpers.is_sha256(file.sha256):
                try:
                    ind = Indicator.Indicator(file.sha256, "Hash - SHA256")
                    ind.add_tags([file.filename, "dropped_file"])
                    ind.add_relationships([file.filename, file.path, file.md5, file.sha1])
                    self.iocs.append(ind)
                except ValueError:
                    pass
                    
        # Make Indicators for any HTTP requests.
        for request in self.http_requests:
            # Check if the host is a domain or IP.
            if RegexHelpers.is_ip(request.host):
                indicator_type = "Address - ipv4-addr"
            # Otherwise it must be a domain.
            else:
                indicator_type = "URI - Domain Name"

            # Make an Indicator for the host.
            try:
                ind = Indicator.Indicator(request.host, indicator_type)
                ind.add_tags(["http_request", request.method])
                if request.method == "POST":
                    ind.add_tags("c2")
                self.iocs.append(ind)
            except ValueError:
                pass

            # Make an Indicator for the URI path.
            if request.uri != "/":
                try:
                    ind = Indicator.Indicator(request.uri, "URI - Path")
                    ind.add_tags(["http_request", request.method, request.host])
                    if request.method == "POST":
                        ind.add_tags("c2")
                    ind.add_relationships(request.host)
                    self.iocs.append(ind)
                except ValueError:
                    pass

            # Make an Indicator for the full URL.
            try:
                url = "http://" + request.host + request.uri
                ind = Indicator.Indicator(url, "URI - URL")
                ind.add_tags(["http_request", request.method])
                if request.method == "POST":
                    ind.add_tags("c2")
                ind.add_relationships([request.host, request.uri])
                self.iocs.append(ind)
            except ValueError:
                pass

            # Make an Indicator for the User-Agent.
            try:
                ind = Indicator.Indicator(request.user_agent, "URI - HTTP - UserAgent")
                ind.add_tags(["http_request", request.method, request.host])
                if request.method == "POST":
                    ind.add_tags("c2")
                ind.add_relationships([request.host, request.uri])
                self.iocs.append(ind)
            except ValueError:
                pass
                
        # Make Indicators for any memory URLs. Currently, only VxStream
        # has this memory URL feature.
        indicator_list = Indicator.generate_url_indicators(self.memory_urls)

        # Add some extra tags to the generated indicators and
        # then add them to our main IOC list.
        for ind in indicator_list:
            ind.add_tags("url_in_memory")
            self.iocs.append(ind)
                
        # Make Indicators for any URLs found in the sample's strings.
        indicator_list = Indicator.generate_url_indicators(self.strings_urls)

        # Add some extra tags to the generated indicators and
        # then add them to our main IOC list.
        for ind in indicator_list:
            ind.add_tags("url_in_strings")
            self.iocs.append(ind)

        # Make Indicators for any URLs found in the sample's process tree.
        indicator_list = Indicator.generate_url_indicators(self.process_tree_urls)

        # Add some extra tags to the generated indicators and
        # then add them to our main IOC list.
        for ind in indicator_list:
            ind.add_tags("url_in_process_tree")
            self.iocs.append(ind)

        # Make Indicators for any mutexes.
        for mutex in self.mutexes:
            try:
                ind = Indicator.Indicator(mutex, "Windows - Mutex")
                ind.add_tags("mutex_created")
                self.iocs.append(ind)
            except ValueError:
                pass
                
        # Run the IOCs through the whitelists if requested.
        if check_whitelist:
            self.iocs = Indicator.run_whitelist(self.iocs)
            
        # Finally merge the IOCs so we don't have any duplicates.
        self.iocs = Indicator.merge_duplicate_indicators(self.iocs)
    def __init__(self,
                 smtp_path=None,
                 smtp_text=None,
                 attached_email=True,
                 check_whitelist=True):
        # Check that we got at least an SMTP path or text:
        if not smtp_path and not smtp_text:
            raise ValueError(
                "You must specify either an SMTP path or the SMTP text.")

        # In case we received both, default to use the smtp_path over the smtp_text.
        if smtp_path:
            # Read the SMTP file. This works with the "smtp.stream" file or in theory
            # an "smtp.email" type file with the SMTP commands removed.
            if os.path.exists(smtp_path):
                self.path = smtp_path
                self.name = os.path.basename(smtp_path)

            with open(self.path) as s:
                smtp_stream = s.read().splitlines()
        else:
            smtp_stream = smtp_text.splitlines()
            # path and name are here just for completeness for anything
            # external that might rely on them.
            self.path = ""
            self.name = hashlib.md5(smtp_text.encode('utf-8')).hexdigest()

        # A place to store the IOCs.
        self.iocs = []

        # Where did this alert come from? This could be anything, such as
        # a URL to an ACE alert or whatever other reference you want.
        self.reference = ""

        # Find the envelope from/to addresses. This will only work if given an
        # "smtp.stream" file, since otherwise the SMTP commands will not exist.
        self.envelope_from = ""
        self.envelope_to = ""
        envelope_address_pattern = re.compile(r'.*<(.*)>.*')
        for line in smtp_stream:
            if line.startswith("MAIL FROM:"):
                try:
                    # Make an Indicator for the address.
                    self.envelope_from = envelope_address_pattern.match(
                        line).group(1)
                    try:
                        ind = Indicator.Indicator(self.envelope_from,
                                                  "Email - Address")
                        ind.add_tags(["phish", "envelope_from_address"])
                        self.iocs.append(ind)
                    except ValueError:
                        pass
                except AttributeError:
                    self.envelope_from = ""
            if line.startswith("RCPT TO:"):
                try:
                    # Make an Indicator for the address.
                    self.envelope_to = envelope_address_pattern.match(
                        line).group(1)
                    try:
                        ind = Indicator.Indicator(self.envelope_from,
                                                  "Email - Address")
                        ind.add_tags(["phish", "envelope_to_address"])
                        self.iocs.append(ind)
                    except ValueError:
                        pass
                except AttributeError:
                    self.envelope_to = ""

        # Exchange journaling sends us the e-mail embedded as an attachment within
        # another e-mail. We need to strip away those outer headers so that we parse
        # the attached e-mail that we actually care about.
        #if attached_email:
        #    if "Content-Type: message/rfc822" in smtp_stream:
        #        index = smtp_stream.index("Content-Type: message/rfc822")
        #        smtp_stream = smtp_stream[index:]

        # Just in case we are dealing with an "smtp.stream" file that still has
        # the SMTP commands above the actual e-mail, we need to strip those out.
        # This will remove all lines prior to the Received: headers so that the
        # email.parser can properly parse out the e-mail. If we were given an
        # "smtp.email" type of file with the SMTP commands already removed, this
        # should not affect anything.
        while not smtp_stream[0].startswith("Received:"):
            smtp_stream.pop(0)

        # Join the header lines into a single string.
        email_text = "\n".join(smtp_stream)

        # Create the e-mail object.
        self._email_obj = email.message_from_string(email_text)

        # If we want to try and parse an embedded/attached e-mail instead...
        if attached_email:
            # Walk the full e-mail's parts.
            for part in self._email_obj.walk():
                # Continue if the part looks like a valid e-mail.
                if part.get_content_type() == "message/rfc822":
                    # Split the part lines into a list.
                    part_text = str(part).splitlines()

                    # Make sure our part starts with the Received: headers.
                    while not part_text[0].startswith("Received:"):
                        part_text.pop(0)
                    part_text = "\n".join(part_text)

                    # Make the new e-mail object.
                    self._email_obj = email.message_from_string(part_text)

        # Parse the e-mail object for its content.
        parsed_email = self._parse_content()

        # Now that we have the e-mail object, parse out some of the interesting parts.
        self.urls = set()
        self.headers = self._get_all_headers_string()

        # Make Indicators for the received headers (SMTP relays)
        self.received = self.get_header("received")
        for hop in self.received:
            for ip in RegexHelpers.find_ip_addresses(hop):
                try:
                    ind = Indicator.Indicator(ip, "Address - ipv4-addr")
                    ind.add_tags(["phish", "smtp_relay"])
                    # We consider SMTP relay indicators benign... Don't want to alert every time
                    # we see the relay sending an e-mail, but it's nice to know for correlation.
                    ind.make_benign()
                    self.iocs.append(ind)
                except ValueError:
                    pass

            for domain in RegexHelpers.find_domains(hop):
                if isinstance(domain, tuple):
                    try:
                        ind = Indicator.Indicator(domain[0],
                                                  "URI - Domain Name")
                        ind.add_tags(["phish", "smtp_relay"])
                        ind.make_benign()
                        self.iocs.append(ind)
                    except ValueError:
                        pass

        # Get the e-mail's plaintext body, HTML body, and the visible text from the HTML.
        self.body = parsed_email["body"]
        self.html = parsed_email["html"]
        if self.html:
            soup = BeautifulSoup(self.html, "html.parser")
            self.visible_html = "".join(soup.findAll(text=True))
        else:
            self.visible_html = ""

        # Get any e-mail attachments.
        self.attachments = parsed_email["attachments"]
        self.attachments_string = ", ".join(
            [attach["name"] for attach in self.attachments])
        self.md5_string = ", ".join(
            [attach["md5"] for attach in self.attachments])

        # Make an Indicator for the from address.
        try:
            self.from_address = self._get_address_list("from")[0][1]
            try:
                ind = Indicator.Indicator(self.from_address, "Email - Address")
                ind.add_tags(["phish", "from_address"])
                self.iocs.append(ind)
            except ValueError:
                pass
        except IndexError:
            self.from_address = ""

        # Make an Indicator for the reply-to address.
        try:
            self.replyto = self._get_address_list("reply-to")[0][1]
            try:
                ind = Indicator.Indicator(self.replyto, "Email - Address")
                ind.add_tags(["phish", "replyto_address"])
                if self.from_address:
                    ind.add_relationships(self.from_address)
                self.iocs.append(ind)
            except ValueError:
                pass
        except IndexError:
            self.replyto = ""

        # Make an Indicator for the subject.
        try:
            self.subject = "".join(self.get_header("subject")[0].splitlines())
            try:
                ind = Indicator.Indicator(self.subject, "Email - Subject")
                ind.add_tags(["phish", "subject"])
                if self.from_address:
                    ind.add_relationships(self.from_address)
                self.iocs.append(ind)
            except ValueError:
                pass
        except IndexError:
            self.subject = ""

        # Try and decode the subject and make an Indicator.
        try:
            self.decoded_subject = "".join(
                str(make_header(decode_header(
                    self.get_header("subject")[0]))).splitlines())
            try:
                ind = Indicator.Indicator(self.decoded_subject,
                                          "Email - Subject")
                ind.add_tags(["phish", "decoded_subject"])
                if self.from_address:
                    ind.add_relationships(self.from_address)
                self.iocs.append(ind)
            except ValueError:
                pass
        except IndexError:
            self.decoded_subject = ""

        # Make an Indicator for each to address.
        self.to_list = [x[1] for x in self._get_address_list("to")]
        self.to_string = ", ".join(self.to_list).replace("\t", " ")
        for address in self.to_list:
            try:
                ind = Indicator.Indicator(address, "Email - Address")
                ind.add_tags(["phish", "to_address"])
                if self.from_address:
                    ind.add_relationships(self.from_address)
                self.iocs.append(ind)
            except ValueError:
                pass

        # Make an Indicator for each CC address.
        self.cc_list = [x[1] for x in self._get_address_list("cc")]
        self.cc_string = ", ".join(self.cc_list).replace("\t", " ")
        for address in self.cc_list:
            try:
                ind = Indicator.Indicator(address, "Email - Address")
                ind.add_tags(["phish", "cc_address"])
                if self.from_address:
                    ind.add_relationships(self.from_address)
                self.iocs.append(ind)
            except ValueError:
                pass

        # Make an Indicator for each BCC address.
        self.bcc_list = [x[1] for x in self._get_address_list("bcc")]
        self.bcc_string = ", ".join(self.bcc_list).replace("\t", " ")
        for address in self.bcc_list:
            try:
                ind = Indicator.Indicator(address, "Email - Address")
                ind.add_tags(["phish", "bcc_address"])
                if self.from_address:
                    ind.add_relationships(self.from_address)
                self.iocs.append(ind)
            except ValueError:
                pass

        # Make an Indicator for the message ID.
        try:
            self.message_id = self.get_header("message-id")[0]
            try:
                ind = Indicator.Indicator(self.message_id, "Email Message ID")
                ind.add_tags(["phish", "message_id"])
                if self.from_address:
                    ind.add_relationships(self.from_address)
                self.iocs.append(ind)
            except ValueError:
                pass
        except IndexError:
            self.message_id = ""

        # Make an Indicator for the x-mailer.
        try:
            self.x_mailer = self.get_header("x-mailer")[0]
            try:
                ind = Indicator.Indicator(self.x_mailer, "Email - Xmailer")
                ind.add_tags(["phish", "x-mailer"])
                if self.from_address:
                    ind.add_relationships(self.from_address)
                self.iocs.append(ind)
            except ValueError:
                pass
        except IndexError:
            self.x_mailer = ""

        # Make an Indicator for the x-original-sender.
        try:
            self.x_original_sender = self.get_header("x-original-sender")[0]
            try:
                ind = Indicator.Indicator(address, "Email - Address")
                ind.add_tags(["phish", "x-original-sender"])
                if self.from_address:
                    ind.add_relationships(self.from_address)
                self.iocs.append(ind)
            except ValueError:
                pass
        except IndexError:
            self.x_original_sender = ""

        # Make an Indicator for the x-originating-ip.
        try:
            x_originating_ip = self.get_header("x-originating-ip")[0]
            # Sometimes this field is in the form: [1.1.1.1]
            # Make sure we remove any non-IP characters.
            ip = RegexHelpers.find_ip_addresses(x_originating_ip)
            if ip:
                self.x_originating_ip = ip[0]
                try:
                    ind = Indicator.Indicator(self.x_originating_ip,
                                              "Address - ipv4-addr")
                    ind.add_tags(["phish", "x-originating-ip"])
                    if self.from_address:
                        ind.add_relationships(self.from_address)
                    self.iocs.append(ind)
                except ValueError:
                    pass
        except IndexError:
            self.x_originating_ip = ""

        # Make an Indicator for the x-sender-ip.
        try:
            x_sender_ip = self.get_header("x-sender-ip")[0]
            # Make sure like the X-Originating-IP that we only
            # get the IP address and no other characters.
            ip = RegexHelpers.find_ip_addresses(x_sender_ip)
            if ip:
                self.x_sender_ip = ip[0]
                try:
                    ind = Indicator.Indicator(self.x_sender_ip,
                                              "Address - ipv4-addr")
                    ind.add_tags(["phish", "x-sender-ip"])
                    if self.from_address:
                        ind.add_relationships(self.from_address)
                    self.iocs.append(ind)
                except ValueError:
                    pass
        except IndexError:
            self.x_sender_ip = ""

        # Make Indicators for any URLs in the plaintext body.
        # Indicator.generate_url_indicators() catches its own exceptions.
        text_urls = RegexHelpers.find_urls(self.body)
        text_urls_indicators = Indicator.generate_url_indicators(text_urls)
        for ind in text_urls_indicators:
            ind.add_tags(["phish", "plaintext_body"])
            if self.from_address:
                ind.add_relationships(self.from_address)
            self.iocs.append(ind)
        RegexHelpers.find_urls(self.html)

        # Make Indicators for any URLs in the HTML body.
        html_urls = RegexHelpers.find_urls(self.html)
        html_urls_indicators = Indicator.generate_url_indicators(html_urls)
        for ind in html_urls_indicators:
            ind.add_tags(["phish", "html_body"])
            if self.from_address:
                ind.add_relationships(self.from_address)
            self.iocs.append(ind)

        # Make Indicators for any URLs in the visible text HTML body.
        visible_html_urls = RegexHelpers.find_urls(self.visible_html)
        visible_html_urls_indicators = Indicator.generate_url_indicators(
            visible_html_urls)
        for ind in visible_html_urls_indicators:
            ind.add_tags(["phish", "visible_html_body"])
            if self.from_address:
                ind.add_relationships(self.from_address)
            self.iocs.append(ind)

        # Make Indicators for different attachment attributes.
        all_urls = text_urls + html_urls + visible_html_urls
        for file in self.attachments:
            # Make Indicators for any strings URLs.
            if "strings_urls" in file:
                attachment_strings_urls_indicators = Indicator.generate_url_indicators(
                    file["strings_urls"])
                for ind in attachment_strings_urls_indicators:
                    ind.add_tags(["phish", "strings_url", file["name"]])
                    if self.from_address:
                        ind.add_relationships(
                            [self.from_address, file["name"]])
                    self.iocs.append(ind)
                all_urls += file["strings_urls"]

            # Make an Indicator for the filename.
            if file["name"]:
                try:
                    ind = Indicator.Indicator(file["name"],
                                              "Windows - FileName")
                    ind.add_tags(["phish", "attachment"])
                    if self.from_address:
                        ind.add_relationships(self.from_address)
                    if file["md5"]:
                        ind.add_relationships(file["md5"])
                    if file["sha256"]:
                        ind.add_relationships(file["sha256"])
                    self.iocs.append(ind)
                except ValueError:
                    pass

            # Make an Indicator for the MD5 hash.
            if file["md5"]:
                try:
                    ind = Indicator.Indicator(file["md5"], "Hash - MD5")
                    ind.add_tags(["phish", "attachment"])
                    if self.from_address:
                        ind.add_relationships(self.from_address)
                    if file["name"]:
                        ind.add_tags(file["name"])
                        ind.add_relationships(file["name"])
                    if file["sha256"]:
                        ind.add_relationships(file["sha256"])
                    self.iocs.append(ind)
                except ValueError:
                    pass

            # Make an Indicator for the SHA256 hash.
            if file["sha256"]:
                try:
                    ind = Indicator.Indicator(file["sha256"], "Hash - SHA256")
                    ind.add_tags(["phish", "attachment"])
                    if self.from_address:
                        ind.add_relationships(self.from_address)
                    if file["name"]:
                        ind.add_tags(file["name"])
                        ind.add_relationships(file["name"])
                    if file["md5"]:
                        ind.add_relationships(file["md5"])
                    self.iocs.append(ind)
                except ValueError:
                    pass

        # Parse the URLs and prevent "duplicate" URLs
        # like http://blah.com/ and http://blah.com
        for url in all_urls:
            # Strip off the ending slash if it's there.
            if url.endswith("/"):
                url = url[:-1]

            self.urls.add(url)

        self.received_time = self._get_received_time()

        # Run the IOCs through the whitelists if requested.
        if check_whitelist:
            self.iocs = Indicator.run_whitelist(self.iocs)

        # Finally merge the IOCs so we don't have any duplicates.
        self.iocs = Indicator.merge_duplicate_indicators(self.iocs)
 def md5(self, md5):
     if RegexHelpers.is_md5(str(md5)):
         self._md5 = str(md5)
     else:
         self._md5 = ""
 def ipv4(self, ip):
     if RegexHelpers.is_ip(ip):
         self.__ipv4 = ip
     else:
         self.__ipv4 = ""
 def sha512(self, hash):
     if RegexHelpers.is_sha512(hash):
         self.__sha512 = hash
     else:
         self.__sha512 = ""
 def sha256(self, hash):
     if RegexHelpers.is_sha256(hash):
         self.__sha256 = hash
     else:
         self.__sha256 = ""
 def sha1(self, hash):
     if RegexHelpers.is_sha1(hash):
         self.__sha1 = hash
     else:
         self.__sha1 = ""
 def md5(self, hash):
     if RegexHelpers.is_md5(hash):
         self.__md5 = hash
     else:
         self.__md5 = ""
    def update_sandbox_analysis(self, sandbox_dict):
        self.logger.debug("Updating Sandbox Analysis section.")

        # Get a working copy of the sandbox analysis section.
        #sandbox_analysis = self.get_section("sandbox_analysis")

        # Create the parent div tag.
        div = self.new_tag("div")

        # Continue if we were given a sandbox dictionary.
        if sandbox_dict:
            # Add the header tag.
            header = self.new_tag("h2", parent=div)
            header.string = "Sandbox Analysis"

            for hash in sandbox_dict:
                # Get a single deduped version of the reports.
                dedup_report = BaseSandboxParser.dedup_reports(
                    sandbox_dict[hash])

                # Add a header for the sample's filename.
                header = self.new_tag("h3", parent=div)
                header.string = dedup_report.filename

                ####################
                ##                ##
                ##  SANDBOX URLS  ##
                ##                ##
                ####################
                self.logger.debug("Updating sandbox URLs for " + hash)

                # Make the new sub-section.
                sandbox_urls_section_id = "sandbox_urls_" + hash
                sandbox_urls_section = self.make_section(
                    sandbox_urls_section_id, parent=div)

                # Create a new parent div for the sub-section.
                sandbox_urls_div = self.new_tag("div")

                # Add a header tag for the URLs.
                header = self.new_tag("h4", parent=sandbox_urls_div)
                header.string = "Sandbox URLs"

                # Add an unordered list for the reports.
                ul = self.new_tag("ul", parent=sandbox_urls_div)

                # Add list items for each report.
                for report in sandbox_dict[hash]:
                    li = self.new_tag("li", parent=ul)
                    li.string = report.sandbox_display_name + " = "
                    link = self.new_tag("a", parent=li)
                    link["href"] = report.sandbox_url
                    link.string = report.filename

                if sandbox_dict[hash][0].sha256:
                    li = self.new_tag("li", parent=ul)
                    link = self.new_tag("a", parent=li)
                    link[
                        "href"] = "https://virustotal.com/en/file/" + sandbox_dict[
                            hash][0].sha256 + "/analysis/"
                    link.string = "VirusTotal"

                # Update the sub-section.
                self.update_section(sandbox_urls_div,
                                    old_section_soup=sandbox_urls_section)

                ###################
                ##               ##
                ##  SCREENSHOTS  ##
                ##               ##
                ###################
                # Only continue if there are actually some screenshots.
                if any(report.screenshot_path
                       for report in sandbox_dict[hash]):
                    self.logger.debug("Updating screenshots for " + hash)

                    # Make the new sub-section.
                    screenshot_section_id = "screenshot_" + hash
                    screenshot_section = self.make_section(
                        screenshot_section_id, parent=div)

                    # Create a new parent div for the sub-section.
                    screenshots_div = self.new_tag("div")

                    # Add a header tag for the screenshots.
                    header = self.new_tag("h4", parent=screenshots_div)
                    header.string = "Screenshots"

                    for report in sandbox_dict[hash]:
                        if report.screenshot_path:
                            screenshot_name = os.path.basename(
                                report.screenshot_path)

                            # Upload the screenshot as an attachment if it doesn't already exist.
                            if not self.attachment_exists(screenshot_name):
                                self.attach_file(report.screenshot_path)

                            # If the screenshot attachment exists, add an img tag for it.
                            if self.attachment_exists(screenshot_name):
                                title_p = self.new_tag("p",
                                                       parent=screenshots_div)
                                title_p[
                                    "style"] = "color:#009000; font-weight:bold;"
                                title_p.string = report.sandbox_display_name + " - " + report.sandbox_vm_name

                                img_p = self.new_tag("p",
                                                     parent=screenshots_div)
                                img = self.new_tag("img", parent=img_p)
                                img["width"] = "1000"
                                img["height"] = "562"
                                src = "/download/attachments/" + str(
                                    self.get_page_id()
                                ) + "/" + screenshot_name + "?effects=border-simple,blur-border,tape"
                                img["src"] = src

                    self.update_section(screenshots_div,
                                        old_section_soup=screenshot_section)

                ###############
                ##           ##
                ##  MUTEXES  ##
                ##           ##
                ###############
                # Only continue if there are actually some mutexes.
                if dedup_report.mutexes:
                    self.logger.debug("Updating mutexes for " + hash)

                    # Make the new sub-section.
                    mutexes_section_id = "mutexes_" + hash
                    mutex_section = self.make_section(mutexes_section_id,
                                                      parent=div)

                    # Create a new parent div for the sub-section.
                    mutexes_div = self.new_tag("div")

                    # Add a header tag for the mutexes.
                    header = self.new_tag("h4", parent=mutexes_div)
                    header.string = "Mutexes"

                    # Add a pre tag to hold them.
                    pre = self.new_tag("pre", parent=mutexes_div)
                    pre["style"] = "border:1px solid gray;padding:5px;"
                    pre.string = ""

                    for mutex in dedup_report.mutexes:
                        pre.string += mutex + "\n"

                    self.update_section(mutexes_div,
                                        old_section_soup=mutex_section)

                #####################
                ##                 ##
                ##  DROPPED FILES  ##
                ##                 ##
                #####################
                # Only continue if there are actually any dropped files.
                if dedup_report.dropped_files:
                    self.logger.debug("Updating dropped files for " + hash)

                    # Make the new sub-section.
                    dropped_section_id = "dropped_" + hash
                    dropped_section = self.make_section(dropped_section_id,
                                                        parent=div)

                    # Create a new parent div for the sub-section.
                    dropped_div = self.new_tag("div")

                    # Add a header tag for the dropped files.
                    header = self.new_tag("h4", parent=dropped_div)
                    header.string = "Dropped Files"

                    # Create a new table tag.
                    table = self.new_tag("table", parent=dropped_div)

                    # Set up the table header row.
                    thead = self.new_tag("thead", parent=table)
                    tr = self.new_tag("tr", parent=thead)
                    titles = [
                        "VirusTotal", "Filename", "Path", "Size", "Type",
                        "MD5", "SHA256"
                    ]
                    for title in titles:
                        th = self.new_tag("th", parent=tr)
                        th.string = title

                    # Set up the table body rows.
                    tbody = self.new_tag("tbody", parent=table)
                    for file in dedup_report.dropped_files:
                        tr = self.new_tag("tr", parent=tbody)

                        td = self.new_tag("td", parent=tr)
                        if file.sha256:
                            url = self.new_tag("a", parent=td)
                            vt_url = "https://virustotal.com/en/file/" + file.sha256 + "/analysis/"
                            url["href"] = vt_url
                            url.string = "VT"

                        td = self.new_tag("td", parent=tr)
                        td.string = file.filename

                        td = self.new_tag("td", parent=tr)
                        td.string = file.path

                        td = self.new_tag("td", parent=tr)
                        td.string = file.size

                        td = self.new_tag("td", parent=tr)
                        td.string = file.type

                        td = self.new_tag("td", parent=tr)
                        td.string = file.md5

                        td = self.new_tag("td", parent=tr)
                        td.string = file.sha256

                    # Update the sub-section.
                    self.update_section(dropped_div,
                                        old_section_soup=dropped_section)

                ####################
                ##                ##
                ##  DNS REQUESTS  ##
                ##                ##
                ####################
                # Only continue if there are actually any dropped files.
                if dedup_report.dns_requests:
                    self.logger.debug("Updating DNS requests for " + hash)

                    # Make the new sub-section.
                    dns_section_id = "dns_" + hash
                    dns_section = self.make_section(dns_section_id, parent=div)

                    # Create a new parent div for the sub-section.
                    dns_div = self.new_tag("div")

                    # Add a header tag for the DNS requests.
                    header = self.new_tag("h4", parent=dns_div)
                    header.string = "DNS Requests"

                    # Create a new table tag.
                    table = self.new_tag("table", parent=dns_div)

                    # Set up the table header row.
                    thead = self.new_tag("thead", parent=table)
                    tr = self.new_tag("tr", parent=thead)
                    titles = [
                        "VirusTotal", "Request", "Type", "VirusTotal",
                        "Answer", "Answer Type"
                    ]
                    for title in titles:
                        th = self.new_tag("th", parent=tr)
                        th.string = title

                    # Set up the table body rows.
                    tbody = self.new_tag("tbody", parent=table)
                    for request in dedup_report.dns_requests:
                        tr = self.new_tag("tr", parent=tbody)

                        td = self.new_tag("td", parent=tr)
                        url = self.new_tag("a", parent=td)
                        vt_url = "https://virustotal.com/en/domain/" + request.request + "/information/"
                        url["href"] = vt_url
                        url.string = "VT"

                        td = self.new_tag("td", parent=tr)
                        td.string = request.request

                        td = self.new_tag("td", parent=tr)
                        td.string = request.type

                        td = self.new_tag("td", parent=tr)
                        if request.answer:
                            if RegexHelpers.is_ip(request.answer):
                                vt_url = "https://virustotal.com/en/ip-address/" + request.answer + "/information/"
                            else:
                                vt_url = "https://virustotal.com/en/domain/" + request.answer + "/information/"

                            url = self.new_tag("a", parent=td)
                            url["href"] = vt_url
                            url.string = "VT"

                        td = self.new_tag("td", parent=tr)
                        td.string = request.answer

                        td = self.new_tag("td", parent=tr)
                        td.string = request.answer_type

                    # Update the sub-section.
                    self.update_section(dns_div, old_section_soup=dns_section)

                #####################
                ##                 ##
                ##  HTTP REQUESTS  ##
                ##                 ##
                #####################
                # Only continue if there are actually any dropped files.
                if dedup_report.http_requests:
                    self.logger.debug("Updating HTTP requests for " + hash)

                    # Make the new sub-section.
                    http_section_id = "http_" + hash
                    http_section = self.make_section(http_section_id,
                                                     parent=div)

                    # Create a new parent div for the sub-section.
                    http_div = self.new_tag("div")

                    # Add a header tag for the DNS requests.
                    header = self.new_tag("h4", parent=http_div)
                    header.string = "HTTP Requests"

                    # Create a new table tag.
                    table = self.new_tag("table", parent=http_div)

                    # Set up the table header row.
                    thead = self.new_tag("thead", parent=table)
                    tr = self.new_tag("tr", parent=thead)
                    titles = [
                        "VirusTotal", "Method", "Host", "URI", "Port",
                        "User-Agent"
                    ]
                    for title in titles:
                        th = self.new_tag("th", parent=tr)
                        th.string = title

                    # Set up the table body rows.
                    tbody = self.new_tag("tbody", parent=table)
                    for request in dedup_report.http_requests:
                        tr = self.new_tag("tr", parent=tbody)

                        td = self.new_tag("td", parent=tr)
                        url = self.new_tag("a", parent=td)
                        full_url = "http://" + request.host + request.uri
                        url_hash = hashlib.sha256(
                            full_url.encode()).hexdigest()
                        vt_url = "https://virustotal.com/en/url/" + url_hash + "/analysis/"
                        url["href"] = vt_url
                        url.string = "VT"

                        td = self.new_tag("td", parent=tr)
                        td.string = request.method

                        td = self.new_tag("td", parent=tr)
                        td.string = request.host

                        td = self.new_tag("td", parent=tr)
                        td.string = request.uri

                        td = self.new_tag("td", parent=tr)
                        td.string = request.port

                        td = self.new_tag("td", parent=tr)
                        td.string = request.user_agent

                    # Update the sub-section.
                    self.update_section(http_div,
                                        old_section_soup=http_section)

                #######################
                ##                   ##
                ##  CONTACTED HOSTS  ##
                ##                   ##
                #######################
                # Only continue if there are actually any dropped files.
                if dedup_report.contacted_hosts:
                    self.logger.debug("Updating contacted hosts for " + hash)

                    # Make the new sub-section.
                    hosts_section_id = "hosts_" + hash
                    hosts_section = self.make_section(hosts_section_id,
                                                      parent=div)

                    # Create a new parent div for the sub-section.
                    hosts_div = self.new_tag("div")

                    # Add a header tag for the DNS requests.
                    header = self.new_tag("h4", parent=hosts_div)
                    header.string = "Contacted Hosts"

                    # Create a new table tag.
                    table = self.new_tag("table", parent=hosts_div)

                    # Set up the table header row.
                    thead = self.new_tag("thead", parent=table)
                    tr = self.new_tag("tr", parent=thead)
                    titles = [
                        "VirusTotal", "Tor Node", "Address", "Port",
                        "Protocol", "Location", "Associated Domains"
                    ]
                    for title in titles:
                        th = self.new_tag("th", parent=tr)
                        th.string = title

                    # Set up the table body rows.
                    tbody = self.new_tag("tbody", parent=table)
                    for host in dedup_report.contacted_hosts:
                        tr = self.new_tag("tr", parent=tbody)

                        td = self.new_tag("td", parent=tr)
                        url = self.new_tag("a", parent=td)
                        vt_url = "https://virustotal.com/en/ip-address/" + host.ipv4 + "/information/"
                        url["href"] = vt_url
                        url.string = "VT"

                        td = self.new_tag("td", parent=tr)
                        if self.whitelister.is_tor_node(host.ipv4):
                            td.string = "True"

                        td = self.new_tag("td", parent=tr)
                        td.string = host.ipv4

                        td = self.new_tag("td", parent=tr)
                        td.string = host.port

                        td = self.new_tag("td", parent=tr)
                        td.string = host.protocol

                        td = self.new_tag("td", parent=tr)
                        td.string = host.location

                        td = self.new_tag("td", parent=tr)
                        td.string = host.associated_domains_string

                    # Update the sub-section.
                    self.update_section(hosts_div,
                                        old_section_soup=hosts_section)

                #####################
                ##                 ##
                ##  PROCESS TREES  ##
                ##                 ##
                #####################
                # Only continue if there are actually some process trees.
                if dedup_report.process_tree_list:
                    self.logger.debug("Updating process tree for " + hash)

                    # Make the new sub-section.
                    process_section_id = "process_" + hash
                    process_section = self.make_section(process_section_id,
                                                        parent=div)

                    # Create a new parent div for the sub-section.
                    process_div = self.new_tag("div")

                    # Add a header tag for the mutexes.
                    header = self.new_tag("h4", parent=process_div)
                    header.string = "Process Tree"

                    # Add a pre tag to hold them.
                    pre = self.new_tag("pre", parent=process_div)
                    pre["style"] = "border:1px solid gray;padding:5px;"
                    pre.string = ""

                    for tree in dedup_report.process_tree_list:
                        pre.string += tree + "\n"

                    self.update_section(process_div,
                                        old_section_soup=process_section)

        self.update_section(div, old_section_id="sandbox_analysis")
Exemple #16
0
def generate_url_indicators(url_list, whitelister=None):
    indicators = []

    # In case we were given a string (a single URL), add it
    # to a list for consistent processing.
    if isinstance(url_list, str):
        url_list = [url_list]

    # Parse the URLs so that we can create Indicators and also prevent
    # "duplicate" URLs like http://blah.com/ and http://blah.com
    for url in url_list:
        if RegexHelpers.is_url(url):
            # Strip off the ending slash if it's there.
            if url.endswith("/"):
                url = url[:-1]

            parsed_url = urlsplit(url)

            # Is the netloc an IP address?
            if RegexHelpers.is_ip(parsed_url.netloc):
                netloc_type = "Address - ipv4-addr"
            # If the netloc is not an IP, it must be a domain.
            else:
                netloc_type = "URI - Domain Name"

            # Make an Indicator for the URI host.
            try:
                ind = Indicator(parsed_url.netloc, netloc_type)
                ind.add_tags("uri_host")
                ind.add_relationships(url)
                indicators.append(ind)
            except ValueError:
                pass

            # Make an Indicator for the full URL.
            try:
                ind = Indicator(url, "URI - URL")
                ind.add_relationships(parsed_url.netloc)
                indicators.append(ind)
            except ValueError:
                pass

            # Make an Indicator for the path (if there is one).
            if parsed_url.path and parsed_url.path != "/":
                try:
                    ind = Indicator(parsed_url.path, "URI - Path")
                    ind.add_tags(["uri_path", parsed_url.netloc])
                    ind.add_relationships([url, parsed_url.netloc])
                    indicators.append(ind)
                except ValueError:
                    pass

            # Make an Indicator for the path including the query items.
            if parsed_url.path and parsed_url.path != "/":
                try:
                    # Check if there were any ? query items.
                    if parsed_url.query:
                        uri_path = parsed_url.path + "?" + parsed_url.query

                        ind = Indicator(uri_path, "URI - Path")
                        ind.add_tags(["uri_path", parsed_url.netloc])
                        ind.add_relationships([url, parsed_url.netloc])
                        indicators.append(ind)
                except ValueError:
                    pass

    return indicators
 def sha1(self, sha1):
     if RegexHelpers.is_sha1(str(sha1)):
         self._sha1 = str(sha1)
     else:
         self._sha1 = ""
    def _parse_attachment(self, message_part, charset):
        part_items = message_part.items()
        for tup in part_items:
            for value in tup:
                if "attachment" in value:
                    file_data = message_part.get_payload()

                    attachment_dict = {}
                    if message_part.get("Content-Transfer-Encoding",
                                        None) == "base64":
                        file_data_b64 = file_data.replace("\n", "")
                        # For some reason, sometimes the attachments don't have the proper
                        # padding. Add a couple "==" on the end for good measure. This doesn't
                        # seem to harm correctly encoded attachments.
                        file_data_decoded = base64.b64decode(file_data_b64 +
                                                             "==")

                        # Try and get strings out of the attachment.
                        strings_list = RegexHelpers.find_strings(
                            file_data_decoded)
                        strings = " ".join(strings_list)

                        # Look for any URLs that were in the strings.
                        strings_urls = RegexHelpers.find_urls(strings)
                        attachment_dict["strings_urls"] = strings_urls

                    elif message_part.get_content_type() == "text/html":
                        file_data_decoded = message_part.get_payload(
                            decode=True).decode(charset).encode('utf-8')
                    else:
                        file_data_decoded = file_data

                    try:
                        md5_hasher = hashlib.md5()
                        md5_hasher.update(file_data_decoded)
                        md5_hash = md5_hasher.hexdigest()
                    except TypeError:
                        md5_hash = ""

                    try:
                        sha256_hasher = hashlib.sha256()
                        sha256_hasher.update(file_data_decoded)
                        sha256_hash = sha256_hasher.hexdigest()
                    except TypeError:
                        sha256_hash = ""

                    attachment_dict["data"] = file_data_decoded
                    attachment_dict[
                        "content_type"] = message_part.get_content_type()
                    attachment_dict["size"] = len(file_data_decoded)
                    attachment_dict["md5"] = md5_hash
                    attachment_dict["sha256"] = sha256_hash
                    attachment_dict["name"] = ""
                    attachment_dict["create_date"] = None
                    attachment_dict["mod_date"] = None
                    attachment_dict["read_date"] = None

                    # Find the attachment name. Normally this follows a specific format
                    # and is called 'filename=' but recently I've seen some that are in
                    # different locations are are just called 'name='... Hence removing
                    # old code and replacing with a regex statement to account for either
                    # name in any location in the message part.
                    attachment_name_pattern = re.compile(
                        r'(file)?name="(.*?)"')
                    for tup in part_items:
                        for item in tup:
                            attachment_name = attachment_name_pattern.search(
                                item)
                            if attachment_name:
                                attachment_dict[
                                    "name"] = RegexHelpers.decode_utf_b64_string(
                                        attachment_name.groups()[1])

                    return attachment_dict

        return None
 def sha256(self, sha256):
     if RegexHelpers.is_sha256(str(sha256)):
         self._sha256 = str(sha256)
     else:
         self._sha256 = ""