Esempio n. 1
0
    def _search_phishing(self, greedy_data):
        with_urls = False

        # If mail is filtered don't check for phishing
        is_filtered = greedy_data["tokenizer"][2]

        if is_filtered:
            return False, False, False

        # Reset phishing bitmap
        self._pb.reset_score()

        # Outputs
        targets = set()

        # Get all data
        mail = greedy_data["tokenizer"][1]
        body = mail.get('body')
        subject = mail.get('subject')
        from_ = mail.get('from')
        urls_body = greedy_data["urls-handler-body"][2]
        urls_attachments = greedy_data["urls-handler-attachments"][2]

        # TODO: if an attachment is filtered, the score is not complete
        # more different mails can have the same attachment
        # more different attachments can have the same mail
        attachments = MailAttachments(greedy_data["attachments"][2])

        urls = (
            (urls_body, 'urls_body'),
            (urls_attachments, 'urls_attachments'))

        # Mapping for targets checks
        mapping_targets = (
            (body, 'mail_body'),
            (from_, 'mail_from'),
            (attachments.payloadstext(), 'text_attachments'),
            (attachments.filenamestext(), 'filename_attachments'))

        for k, v in mapping_targets:
            if k:
                matcher = partial(search_words_given_key, k)
                t = set(i for i in map(matcher, self._t_keys.iteritems()) if i)
                if t:
                    targets |= t
                    self._pb.set_property_score(v)

        # Check urls
        # Target not added because urls come already analyzed text
        for k, v in urls:
            if k:
                with_urls = True
                if any(check_urls(k, i) for i in self._t_keys.values()):
                    self._pb.set_property_score(v)

        # Check subject
        if swt(subject, self._s_keys):
            self._pb.set_property_score("mail_subject")

        return self._pb.score, list(targets), with_urls
Esempio n. 2
0
class Attachments(AbstractBolt):
    outputs = ['sha256_random', 'with_attachments', 'attachments']

    def initialize(self, stormconf, context):
        super(Attachments, self).initialize(stormconf, context)
        self.attach = MailAttachments()
        self._load_settings()

    def _load_settings(self):
        # Loading configuration
        self._load_lists()

        settings = copy.deepcopy(self.conf)
        settings.update({
            "filter_cont_types":
            self._filter_cont_types,
            "tika_whitelist_cont_types":
            self._tika_whitelist_cont_types
        })

        self.attach.reload(**settings)

    def _load_lists(self):

        # Load content types to filter
        self._filter_cont_types = load_keywords_list(
            self.conf["content_types_blacklist"], lower=False)
        self.log("Content types to filter reloaded")

        # Load Tika content types to analyze
        self._tika_whitelist_cont_types = set()
        if self.conf["tika"]["enabled"]:
            self._tika_whitelist_cont_types = load_keywords_list(
                self.conf["tika"]["valid_content_types"], lower=False)
            self.log("Whitelist Tika content types reloaded")

    def process_tick(self, freq):
        """Every freq seconds you reload the keywords. """
        super(Attachments, self).process_tick(freq)
        self._load_settings()

    def process(self, tup):
        try:
            sha256_random = tup.values[0]
            with_attachments = tup.values[1]

            # Remove all values
            self.attach.removeall()

            # Add the new values
            self.attach.extend(tup.values[2])

            # Run analysis
            # self.attach.run() == self.attach()
            self.attach.run()

        except Error, e:
            self.raise_exception(e, tup)

        else:
Esempio n. 3
0
    def process(self, tup):
        try:
            sha256_rand, mail = self._make_mail(tup)
            with_attachments = False
            attachments = []
            body = self.parser.body

            # If filter network is enabled
            is_filtered = False
            if self.filter_network_enabled:
                if mail["sender_ip"] in self._network_analyzed:
                    is_filtered = True

                # Update databese mail analyzed
                self._network_analyzed.append(mail["sender_ip"])

            # If filter mails is enabled
            is_filtered = False
            if self.filter_mails_enabled:
                if mail["sha1"] in self._mails_analyzed:
                    mail.pop("body", None)
                    body = six.text_type()
                    is_filtered = True

                # Update databese mail analyzed
                self._mails_analyzed.append(mail["sha1"])

            # Emit only attachments
            raw_attach = self.parser.attachments_list

            if raw_attach:
                with_attachments = True
                attachments = MailAttachments.withhashes(raw_attach)

                # If filter attachments is enabled
                if self.filter_attachments_enabled:
                    hashes = attachments.filter(self._attachments_analyzed)
                    self._attachments_analyzed.extend(hashes)

        except TypeError:
            pass

        except UnicodeDecodeError:
            pass

        else:
            # Emit network
            self.emit([sha256_rand, mail["sender_ip"], is_filtered],
                      stream="network")

            # Emit mail
            self.emit([sha256_rand, mail, is_filtered], stream="mail")

            # Emit body
            self.emit([sha256_rand, body, is_filtered], stream="body")

            self.emit([sha256_rand, with_attachments,
                       list(attachments)],
                      stream="attachments")
Esempio n. 4
0
class Attachments(AbstractBolt):
    outputs = ['sha256_random', 'with_attachments', 'attachments']

    def initialize(self, stormconf, context):
        super(Attachments, self).initialize(stormconf, context)
        self.attach = MailAttachments()
        self._load_settings()

    def _load_settings(self):
        # Loading configuration
        settings = self._load_lists()
        self.attach.reload(**settings)

    def _load_lists(self):
        settings = copy.deepcopy(self.conf)

        for k in self.conf:
            for i, j in self.conf[k].get("lists", {}).items():
                settings[k][i] = load_keywords_list(j)
                self.log("Loaded lists {!r} for {!r}".format(i, k), "debug")
                self.log(
                    "Keys[{!r}][{!r}]: {}".format(k, i,
                                                  ", ".join(settings[k][i])),
                    "debug")
        else:
            return settings

    def process_tick(self, freq):
        """Every freq seconds you reload the keywords. """
        super(Attachments, self).process_tick(freq)
        self._load_settings()

    def process(self, tup):
        try:
            sha256_random = tup.values[0]
            sha256 = sha256_random.split("_")[0]
            self.log("Processing started: {}".format(sha256))
            with_attachments = tup.values[1]

            # Remove all values
            self.attach.removeall()

            # Add the new values
            self.attach.extend(tup.values[2])

            # Run analysis
            # self.attach.run() == self.attach()
            self.attach.run()

        except BinAsciiError, e:
            self.raise_exception(e, tup)

        else:
Esempio n. 5
0
    def _get_urls(self, greedy_data):

        # If mail is filtered don't check for urls
        is_filtered = greedy_data["tokenizer"][2]
        results = {}

        # urls body
        if not is_filtered:
            text = greedy_data["tokenizer"][1]
            urls = text2urls_whitelisted(text, self.whitelists, self.faup)
            if urls:
                results["body"] = urls

        # I can have 2 mails with same body, but with different attachments
        attachments = MailAttachments(greedy_data["attachments"][2])
        text = attachments.payloadstext()
        urls = text2urls_whitelisted(text, self.whitelists, self.faup)
        if urls:
            results["attachments"] = urls

        return results
Esempio n. 6
0
    def process(self, tup):
        try:
            sha256_rand, mail = self._make_mail(tup)
            sha256 = sha256_rand.split("_")[0]
            self.log("Processing started: {}".format(sha256))
            with_attachments = False
            attachments = []
            body = self.parser.body
            raw_mail = tup.values[0]
            mail_type = tup.values[5]

            # If filter network is enabled
            is_filtered_net = False
            if self.filter_network:
                if mail["sender_ip"] in self.analyzed_network:
                    is_filtered_net = True

                # Update database ip addresses analyzed
                self.analyzed_network.append(mail["sender_ip"])

            # If filter mails is enabled
            is_filtered_mail = False
            if self.filter_mails:
                if mail["sha1"] in self.analyzed_mails:
                    mail.pop("body", None)
                    body = six.text_type()
                    raw_mail = six.text_type()
                    is_filtered_mail = True

                # Update database mails analyzed
                self.analyzed_mails.append(mail["sha1"])

            if self.parser.attachments:
                with_attachments = True
                attachments = MailAttachments.withhashes(
                    self.parser.attachments)

                # If filter attachments is enabled
                if self.filter_attachments:
                    hashes = attachments.filter(self.analyzed_attachments)
                    self.analyzed_attachments.extend(hashes)

        except TypeError, e:
            self.raise_exception(e, tup)
Esempio n. 7
0
    def process(self, tup):
        try:
            sha256_rand, mail = self._make_mail(tup)
            with_attachments = False
            attachments = []
            body = self.parser.body

            # If filter network is enabled
            is_filtered = False
            if self.filter_network_enabled:
                if mail["sender_ip"] in self._network_analyzed:
                    is_filtered = True

                # Update databese mail analyzed
                self._network_analyzed.append(mail["sender_ip"])

            # If filter mails is enabled
            is_filtered = False
            if self.filter_mails_enabled:
                if mail["sha1"] in self._mails_analyzed:
                    mail.pop("body", None)
                    body = six.text_type()
                    is_filtered = True

                # Update databese mail analyzed
                self._mails_analyzed.append(mail["sha1"])

            # Emit only attachments
            raw_attach = self.parser.attachments_list

            if raw_attach:
                with_attachments = True
                attachments = MailAttachments.withhashes(raw_attach)

                # If filter attachments is enabled
                if self.filter_attachments_enabled:
                    hashes = attachments.filter(self._attachments_analyzed)
                    self._attachments_analyzed.extend(hashes)

        except TypeError, e:
            self.raise_exception(e, tup)
 def process(self, tup):
     sha256_mail_random = tup.values[0]
     attachments = MailAttachments(tup.values[2])
     text = attachments.payloadstext()
     with_urls, urls = self._extract_urls(text)
     self.emit([sha256_mail_random, with_urls, urls])
Esempio n. 9
0
 def initialize(self, stormconf, context):
     super(Attachments, self).initialize(stormconf, context)
     self.attach = MailAttachments()
     self._load_settings()