def __init__(self, data):
        
        session = Session()
        context = Mail()
        context.parsing_date = datetime.now()
        mail = message_from_string(data)
        
        for name, adapter in component.getAdapters((context,), interfaces.IFieldMapper):
            adapter.parse(mail)
        
        for part in mail.mailparts:
            type = part.type
            adapter = component.queryAdapter(context, interface=interfaces.IContentMapper, name=type)
            if adapter is None:
                adapter = component.getAdapter(context, interface=interfaces.IContentMapper, name='default')
            adapter.parse(part)
        
        backuppath = local_configuration['backup'].get('backup', '')
        if os.path.isdir(backuppath):
            name = time.strftime('%Y%m%d%H%M')
            path = os.path.join(backuppath, '%s.mail' % name)
            counter = 0
            while os.path.isfile(path):
                path = os.path.join(backuppath, '%s_%s.mail' % (name, counter,))
                counter += 1
            context.original_path = os.path.abspath(path)
            with open(path, 'w') as f:
                f.write(data)
            f.close()

        context.hash = hashlib.md5(data).hexdigest()

        session.add(context)
        transaction.commit()
        session.close()
Exemple #2
0
    def scan(self, payload: Payload, request_meta: RequestMeta) -> WorkerResponse:

        message_json = {}
        attachments = []
        errors = []
        ioc_content = ''
        email_session = UnicodeDammit(payload.content).unicode_markup
        message = pyzmail.message_from_string(email_session)

        # Create a dict of the SMTP headers
        for header in message.keys():
            curr_header = header.lower()
            if curr_header in message_json:
                # If the header key already exists, let's join them
                message_json[curr_header] += f'\n{message.get_decoded_header(header)}'
            else:
                message_json[curr_header] = message.get_decoded_header(header)

        if not self.omit_body:
            # Extract the e-mail body, to include HTML if available
            message_json['body'] = (
                ''
                if message.text_part is None
                else UnicodeDammit(message.text_part.get_payload()).unicode_markup
            )
            message_json['body_html'] = (
                ''
                if message.html_part is None
                else UnicodeDammit(message.html_part.get_payload()).unicode_markup
            )

        if self.extract_iocs:
            for k in self.ioc_keys:
                if k in message_json:
                    ioc_content += f'{message_json[k]}\n'

        # Handle attachments
        for mailpart in message.mailparts:
            # Skip if the attachment is a body part
            if mailpart.is_body:
                if self.extract_iocs:
                    ioc_content += UnicodeDammit(mailpart.get_payload()).unicode_markup
            elif mailpart.type.startswith('message/'):
                for part in mailpart.part.get_payload():
                    try:
                        attachment_meta = PayloadMeta(
                            should_archive=self.archive_attachments,
                            extra_data={'attached_msg': True},
                            dispatch_to=['smtp'],
                        )
                        attachment = ExtractedPayload(part.as_bytes(), attachment_meta)
                        attachments.append(attachment)
                    except Exception as err:
                        errors.append(f'Failed extracting attachment: {err}')
            else:
                try:
                    att_filename = mailpart.filename
                    if not att_filename:
                        att_filename = mailpart.sanitized_filename
                    attachment_meta = PayloadMeta(
                        should_archive=self.archive_attachments,
                        extra_data={
                            'charset': mailpart.charset,
                            'content-description': mailpart.part.get(
                                'Content-Description'
                            ),
                            'content-id': mailpart.content_id,
                            'disposition': mailpart.disposition,
                            'filename': att_filename,
                            'type': mailpart.type,
                        },
                        dispatch_to=self.always_dispatch,
                    )
                    attachment = ExtractedPayload(
                        mailpart.get_payload(), attachment_meta
                    )
                    attachments.append(attachment)
                except Exception as err:
                    errors.append(f'Failed extracting attachment: {err}')

        if self.extract_iocs:
            ioc_meta = PayloadMeta(should_archive=False, dispatch_to=['iocextract'])
            attachments.append(ExtractedPayload(ioc_content.encode(), ioc_meta))

        return WorkerResponse(message_json, errors=errors, extracted=attachments)
    def scan(self, payload, **kwargs):

        extracted_urls = None
        extracted_ips = None

        # Grab the uuid of so we can pass it off to the attachment
        uuid = kwargs.get('uuid', [self.stoq.get_uuid])

        payload = self.stoq.force_unicode(payload)
        email_sessions = self.carve_email(payload)

        # Get the appropriate metadata from the vortex filename
        vortex_meta = self.vortex_metadata(kwargs['filename'])

        # Iterate over each e-mail session
        for email_session in email_sessions:
            message_json = {}
            message = pyzmail.message_from_string(email_session)

            if vortex_meta:
                # Setup our primary message json blob
                message_json = vortex_meta
                message_json['vortex_filename'] = kwargs['filename']

            # Create a dict of the headers in the session
            for k, v in list(message.items()):
                curr_header = k.lower()
                if curr_header in message_json:
                    # If the header key already exists, let's join them
                    message_json[curr_header] += "\n{}".format(message.get_decoded_header(k))
                else:
                    message_json[curr_header] = message.get_decoded_header(k)

            # Extract the e-mail body, to include HTML if available
            if message.text_part is not None:
                message_json['body'] = self.stoq.force_unicode(
                    message.text_part.get_payload())
            else:
                message_json['body'] = ""

            if message.html_part is not None:
                message_json['body_html'] = self.stoq.force_unicode(
                    message.html_part.get_payload())
            else:
                message_json['body_html'] = ""

            # Make this easy, merge both text and html body within e-mail
            # for the purpose of extracting any URIs
            email_body = "{}{}".format(message_json['body'],
                                       message_json['body_html'])

            # Extract and normalize any IP addresses in headers
            if self.extract_iocs:
                # str of concatenated ip_headers
                concat_ips = ""

                # Define which headers we want to extract IP addresses from
                ip_headers = ['src_ip',
                              'dest_ip',
                              'received',
                              'x-orig-ip',
                              'x-originating-ip',
                              'x-remote-ip',
                              'x-sender-ip']

                # concat all of our headers into one string for easy searching
                for ip_header in ip_headers:
                    if ip_header in message_json:
                        concat_ips += message_json[ip_header]

                extracted_ips = self.readers['iocregex'].read(concat_ips,
                                                              datatype_flag='ipv4')

                # Let's get a unique list of IP addresses from extracted data
                if 'ipv4' in extracted_ips:
                    message_json['ips'] = extracted_ips['ipv4']

                # extract and normalize any URLs found
                extracted_urls = self.readers['iocregex'].read(email_body,
                                                               datatype_flag='url')

                # Extract any URLs that may be in the merged body
                if 'url' in extracted_urls:
                    message_json['urls'] = extracted_urls['url']

            # Handle attachments
            message_json['att'] = []
            for mailpart in message.mailparts:
                try:
                    filename = mailpart.filename
                except TypeError:
                    filename = "None"

                # This is a check for winmail.dat files. If successful,
                # skip_attachment will be True and we will use the
                # results from that instead of winmail.dat file itself.
                skip_attachment = False

                if mailpart.type == "text/plain":
                    try:
                        message_json['body'] += self.stoq.force_unicode(mailpart.get_payload())
                    except:
                        pass
                    skip_attachment = True
                else:

                    if filename == "winmail.dat":
                        tnef_results = TNEF(mailpart.get_payload())

                        # we have data, let's handle it.
                        if tnef_results.attachments:
                            # We have a valid file within winmail.dat,
                            # let's make sure we only handle it here.
                            skip_attachment = True
                            for tnef_attachment in tnef_results.attachments:
                                try:
                                    filename = self.stoq.force_unicode(tnef_attachment.name)
                                except:
                                    filename = "None"

                                try:
                                    attachment_json = self.handle_attachments(payload=tnef_attachment.data,
                                                                              filename=filename,
                                                                              uuid=message_json['uuid'])
                                    if attachment_json:
                                        message_json['att'].append(attachment_json)
                                except:
                                    pass

                # Let's handle the attachment normally
                if not skip_attachment:
                    attachment_json = self.handle_attachments(payload=mailpart.get_payload(),
                                                              filename=filename,
                                                              uuid=uuid)
                    if attachment_json:
                        attachment_json['desc'] = mailpart.part.get('Content-Description')
                        attachment_json['type'] = mailpart.type
                        message_json['att'].append(attachment_json)

        if self.use_bloom:
            # Check bloom filters
            for field_name, field_bloom in self.bloomfilters.items():

                # If the configured field name exists in parsed data...
                if field_name in message_json:

                    # extract the field value and check if it has been seen
                    # before...
                    field_value = message_json[field_name]
                    seen_before = field_bloom.query_filter(
                        field_value, add_missing=True)

                    # Generate JSON entry key for flagging new field values
                    field_flag = "{}_isnew".format(field_name)

                    # if the value has not been seen before...
                    if not seen_before:
                        # flag it as new within JSON
                        message_json[field_flag] = True
                    else:
                        message_json[field_flag] = False

        return message_json
    def scan(self, payload, **kwargs):

        if not payload:
            self.log.warn(
                "SMTP session is empty. Do you have permission to the source?")
            return False

        extracted_urls = None
        extracted_ips = None

        # Grab the uuid of so we can pass it off to the attachment
        uuid = kwargs.get('uuid', [self.stoq.get_uuid])

        # Get the appropriate metadata from the vortex filename
        vortex_meta = self.vortex_metadata(kwargs['filename'])

        # If vortex_meta returns False, it means the payload being analyzed is
        # the client session, which contains useless information. Let's just
        # skip it.
        if vortex_meta is False:
            self.log.debug("Vortex client sessions provided, skipping...")
            return True

        # Iterate over each e-mail session
        for email_session in self.carve_email(payload):
            email_session = self.stoq.force_unicode(email_session)
            message_json = {}
            message = pyzmail.message_from_string(email_session)

            if vortex_meta:
                # Setup our primary message json blob
                message_json = vortex_meta.copy()
                message_json['vortex_filename'] = kwargs['filename']

            # Create a dict of the headers in the session
            for k, v in list(message.items()):
                curr_header = k.lower()
                if curr_header in message_json:
                    # If the header key already exists, let's join them
                    message_json[curr_header] += "\n{}".format(
                        message.get_decoded_header(k))
                else:
                    message_json[curr_header] = message.get_decoded_header(k)

            # Extract the e-mail body, to include HTML if available
            if message.text_part is not None:
                message_json['body'] = self.stoq.force_unicode(
                    message.text_part.get_payload())
            else:
                message_json['body'] = ""

            if message.html_part is not None:
                message_json['body_html'] = self.stoq.force_unicode(
                    message.html_part.get_payload())
            else:
                message_json['body_html'] = ""

            # Make this easy, merge both text and html body within e-mail
            # for the purpose of extracting any URIs
            email_body = "{}{}".format(message_json['body'],
                                       message_json['body_html'])

            # Extract and normalize any IP addresses in headers
            if self.extract_iocs:
                # str of concatenated ip_headers
                concat_ips = ""

                # Define which headers we want to extract IP addresses from
                ip_headers = [
                    'src_ip', 'dest_ip', 'received', 'x-orig-ip',
                    'x-originating-ip', 'x-remote-ip', 'x-sender-ip'
                ]

                # concat all of our headers into one string for easy searching
                for ip_header in ip_headers:
                    if ip_header in message_json:
                        concat_ips += message_json[ip_header]

                extracted_ips = self.readers['iocregex'].read(
                    concat_ips, datatype_flag='ipv4')

                # Let's get a unique list of IP addresses from extracted data
                if 'ipv4' in extracted_ips:
                    message_json['ips'] = extracted_ips['ipv4']

                # extract and normalize any URLs found
                extracted_urls = self.readers['iocregex'].read(
                    email_body, datatype_flag='url')

                # Extract any URLs that may be in the merged body
                if 'url' in extracted_urls:
                    message_json['urls'] = extracted_urls['url']

            # Handle attachments
            message_json['att'] = []
            for mailpart in message.mailparts:
                try:
                    filename = mailpart.filename
                except TypeError:
                    filename = "None"

                # This is a check for winmail.dat files. If successful,
                # skip_attachment will be True and we will use the
                # results from that instead of winmail.dat file itself.
                skip_attachment = False

                if mailpart.type == "text/plain":
                    try:
                        message_json['body'] += self.stoq.force_unicode(
                            mailpart.get_payload())
                    except:
                        pass
                    skip_attachment = True
                else:

                    if filename == "winmail.dat":
                        tnef_results = TNEF(mailpart.get_payload())

                        # we have data, let's handle it.
                        if tnef_results.attachments:
                            # We have a valid file within winmail.dat,
                            # let's make sure we only handle it here.
                            skip_attachment = True
                            for tnef_attachment in tnef_results.attachments:
                                try:
                                    filename = self.stoq.force_unicode(
                                        tnef_attachment.name)
                                except:
                                    filename = "None"

                                try:
                                    attachment_json = self.handle_attachments(
                                        payload=tnef_attachment.data,
                                        filename=filename,
                                        uuid=message_json['uuid'])
                                    if attachment_json:
                                        message_json['att'].append(
                                            attachment_json)
                                except:
                                    pass

                # Let's handle the attachment normally
                if not skip_attachment:
                    attachment_json = self.handle_attachments(
                        payload=mailpart.get_payload(),
                        filename=filename,
                        uuid=uuid)
                    if attachment_json:
                        attachment_json['desc'] = mailpart.part.get(
                            'Content-Description')
                        attachment_json['type'] = mailpart.type
                        message_json['att'].append(attachment_json)

            if self.use_bloom:
                # Check bloom filters
                for field_name, field_bloom in self.bloomfilters.items():

                    # If the configured field name exists in parsed data...
                    if field_name in message_json:

                        # extract the field value and check if it has been seen
                        # before...
                        field_value = message_json[field_name]
                        seen_before = field_bloom.query_filter(
                            field_value, add_missing=True)

                        # Generate JSON entry key for flagging new field values
                        field_flag = "{}_isnew".format(field_name)

                        # if the value has not been seen before...
                        if not seen_before:
                            # flag it as new within JSON
                            message_json[field_flag] = True
                        else:
                            message_json[field_flag] = False

            # Make sure we delete the body and body_html keys if they are to
            # be omitted
            if self.omit_body:
                message_json.pop('body', None)
                message_json.pop('body_html', None)

            yield message_json