def __init__(self, data): session = Session() context = Mail() context.parsing_date = datetime.now() mail = message_from_string(data) for name, adapter in component.getAdapters((context,), interfaces.IFieldMapper): adapter.parse(mail) for part in mail.mailparts: type = part.type adapter = component.queryAdapter(context, interface=interfaces.IContentMapper, name=type) if adapter is None: adapter = component.getAdapter(context, interface=interfaces.IContentMapper, name='default') adapter.parse(part) backuppath = local_configuration['backup'].get('backup', '') if os.path.isdir(backuppath): name = time.strftime('%Y%m%d%H%M') path = os.path.join(backuppath, '%s.mail' % name) counter = 0 while os.path.isfile(path): path = os.path.join(backuppath, '%s_%s.mail' % (name, counter,)) counter += 1 context.original_path = os.path.abspath(path) with open(path, 'w') as f: f.write(data) f.close() context.hash = hashlib.md5(data).hexdigest() session.add(context) transaction.commit() session.close()
def scan(self, payload: Payload, request_meta: RequestMeta) -> WorkerResponse: message_json = {} attachments = [] errors = [] ioc_content = '' email_session = UnicodeDammit(payload.content).unicode_markup message = pyzmail.message_from_string(email_session) # Create a dict of the SMTP headers for header in message.keys(): curr_header = header.lower() if curr_header in message_json: # If the header key already exists, let's join them message_json[curr_header] += f'\n{message.get_decoded_header(header)}' else: message_json[curr_header] = message.get_decoded_header(header) if not self.omit_body: # Extract the e-mail body, to include HTML if available message_json['body'] = ( '' if message.text_part is None else UnicodeDammit(message.text_part.get_payload()).unicode_markup ) message_json['body_html'] = ( '' if message.html_part is None else UnicodeDammit(message.html_part.get_payload()).unicode_markup ) if self.extract_iocs: for k in self.ioc_keys: if k in message_json: ioc_content += f'{message_json[k]}\n' # Handle attachments for mailpart in message.mailparts: # Skip if the attachment is a body part if mailpart.is_body: if self.extract_iocs: ioc_content += UnicodeDammit(mailpart.get_payload()).unicode_markup elif mailpart.type.startswith('message/'): for part in mailpart.part.get_payload(): try: attachment_meta = PayloadMeta( should_archive=self.archive_attachments, extra_data={'attached_msg': True}, dispatch_to=['smtp'], ) attachment = ExtractedPayload(part.as_bytes(), attachment_meta) attachments.append(attachment) except Exception as err: errors.append(f'Failed extracting attachment: {err}') else: try: att_filename = mailpart.filename if not att_filename: att_filename = mailpart.sanitized_filename attachment_meta = PayloadMeta( should_archive=self.archive_attachments, extra_data={ 'charset': mailpart.charset, 'content-description': mailpart.part.get( 'Content-Description' ), 'content-id': mailpart.content_id, 'disposition': mailpart.disposition, 'filename': att_filename, 'type': mailpart.type, }, dispatch_to=self.always_dispatch, ) attachment = ExtractedPayload( mailpart.get_payload(), attachment_meta ) attachments.append(attachment) except Exception as err: errors.append(f'Failed extracting attachment: {err}') if self.extract_iocs: ioc_meta = PayloadMeta(should_archive=False, dispatch_to=['iocextract']) attachments.append(ExtractedPayload(ioc_content.encode(), ioc_meta)) return WorkerResponse(message_json, errors=errors, extracted=attachments)
def scan(self, payload, **kwargs): extracted_urls = None extracted_ips = None # Grab the uuid of so we can pass it off to the attachment uuid = kwargs.get('uuid', [self.stoq.get_uuid]) payload = self.stoq.force_unicode(payload) email_sessions = self.carve_email(payload) # Get the appropriate metadata from the vortex filename vortex_meta = self.vortex_metadata(kwargs['filename']) # Iterate over each e-mail session for email_session in email_sessions: message_json = {} message = pyzmail.message_from_string(email_session) if vortex_meta: # Setup our primary message json blob message_json = vortex_meta message_json['vortex_filename'] = kwargs['filename'] # Create a dict of the headers in the session for k, v in list(message.items()): curr_header = k.lower() if curr_header in message_json: # If the header key already exists, let's join them message_json[curr_header] += "\n{}".format(message.get_decoded_header(k)) else: message_json[curr_header] = message.get_decoded_header(k) # Extract the e-mail body, to include HTML if available if message.text_part is not None: message_json['body'] = self.stoq.force_unicode( message.text_part.get_payload()) else: message_json['body'] = "" if message.html_part is not None: message_json['body_html'] = self.stoq.force_unicode( message.html_part.get_payload()) else: message_json['body_html'] = "" # Make this easy, merge both text and html body within e-mail # for the purpose of extracting any URIs email_body = "{}{}".format(message_json['body'], message_json['body_html']) # Extract and normalize any IP addresses in headers if self.extract_iocs: # str of concatenated ip_headers concat_ips = "" # Define which headers we want to extract IP addresses from ip_headers = ['src_ip', 'dest_ip', 'received', 'x-orig-ip', 'x-originating-ip', 'x-remote-ip', 'x-sender-ip'] # concat all of our headers into one string for easy searching for ip_header in ip_headers: if ip_header in message_json: concat_ips += message_json[ip_header] extracted_ips = self.readers['iocregex'].read(concat_ips, datatype_flag='ipv4') # Let's get a unique list of IP addresses from extracted data if 'ipv4' in extracted_ips: message_json['ips'] = extracted_ips['ipv4'] # extract and normalize any URLs found extracted_urls = self.readers['iocregex'].read(email_body, datatype_flag='url') # Extract any URLs that may be in the merged body if 'url' in extracted_urls: message_json['urls'] = extracted_urls['url'] # Handle attachments message_json['att'] = [] for mailpart in message.mailparts: try: filename = mailpart.filename except TypeError: filename = "None" # This is a check for winmail.dat files. If successful, # skip_attachment will be True and we will use the # results from that instead of winmail.dat file itself. skip_attachment = False if mailpart.type == "text/plain": try: message_json['body'] += self.stoq.force_unicode(mailpart.get_payload()) except: pass skip_attachment = True else: if filename == "winmail.dat": tnef_results = TNEF(mailpart.get_payload()) # we have data, let's handle it. if tnef_results.attachments: # We have a valid file within winmail.dat, # let's make sure we only handle it here. skip_attachment = True for tnef_attachment in tnef_results.attachments: try: filename = self.stoq.force_unicode(tnef_attachment.name) except: filename = "None" try: attachment_json = self.handle_attachments(payload=tnef_attachment.data, filename=filename, uuid=message_json['uuid']) if attachment_json: message_json['att'].append(attachment_json) except: pass # Let's handle the attachment normally if not skip_attachment: attachment_json = self.handle_attachments(payload=mailpart.get_payload(), filename=filename, uuid=uuid) if attachment_json: attachment_json['desc'] = mailpart.part.get('Content-Description') attachment_json['type'] = mailpart.type message_json['att'].append(attachment_json) if self.use_bloom: # Check bloom filters for field_name, field_bloom in self.bloomfilters.items(): # If the configured field name exists in parsed data... if field_name in message_json: # extract the field value and check if it has been seen # before... field_value = message_json[field_name] seen_before = field_bloom.query_filter( field_value, add_missing=True) # Generate JSON entry key for flagging new field values field_flag = "{}_isnew".format(field_name) # if the value has not been seen before... if not seen_before: # flag it as new within JSON message_json[field_flag] = True else: message_json[field_flag] = False return message_json
def scan(self, payload, **kwargs): if not payload: self.log.warn( "SMTP session is empty. Do you have permission to the source?") return False extracted_urls = None extracted_ips = None # Grab the uuid of so we can pass it off to the attachment uuid = kwargs.get('uuid', [self.stoq.get_uuid]) # Get the appropriate metadata from the vortex filename vortex_meta = self.vortex_metadata(kwargs['filename']) # If vortex_meta returns False, it means the payload being analyzed is # the client session, which contains useless information. Let's just # skip it. if vortex_meta is False: self.log.debug("Vortex client sessions provided, skipping...") return True # Iterate over each e-mail session for email_session in self.carve_email(payload): email_session = self.stoq.force_unicode(email_session) message_json = {} message = pyzmail.message_from_string(email_session) if vortex_meta: # Setup our primary message json blob message_json = vortex_meta.copy() message_json['vortex_filename'] = kwargs['filename'] # Create a dict of the headers in the session for k, v in list(message.items()): curr_header = k.lower() if curr_header in message_json: # If the header key already exists, let's join them message_json[curr_header] += "\n{}".format( message.get_decoded_header(k)) else: message_json[curr_header] = message.get_decoded_header(k) # Extract the e-mail body, to include HTML if available if message.text_part is not None: message_json['body'] = self.stoq.force_unicode( message.text_part.get_payload()) else: message_json['body'] = "" if message.html_part is not None: message_json['body_html'] = self.stoq.force_unicode( message.html_part.get_payload()) else: message_json['body_html'] = "" # Make this easy, merge both text and html body within e-mail # for the purpose of extracting any URIs email_body = "{}{}".format(message_json['body'], message_json['body_html']) # Extract and normalize any IP addresses in headers if self.extract_iocs: # str of concatenated ip_headers concat_ips = "" # Define which headers we want to extract IP addresses from ip_headers = [ 'src_ip', 'dest_ip', 'received', 'x-orig-ip', 'x-originating-ip', 'x-remote-ip', 'x-sender-ip' ] # concat all of our headers into one string for easy searching for ip_header in ip_headers: if ip_header in message_json: concat_ips += message_json[ip_header] extracted_ips = self.readers['iocregex'].read( concat_ips, datatype_flag='ipv4') # Let's get a unique list of IP addresses from extracted data if 'ipv4' in extracted_ips: message_json['ips'] = extracted_ips['ipv4'] # extract and normalize any URLs found extracted_urls = self.readers['iocregex'].read( email_body, datatype_flag='url') # Extract any URLs that may be in the merged body if 'url' in extracted_urls: message_json['urls'] = extracted_urls['url'] # Handle attachments message_json['att'] = [] for mailpart in message.mailparts: try: filename = mailpart.filename except TypeError: filename = "None" # This is a check for winmail.dat files. If successful, # skip_attachment will be True and we will use the # results from that instead of winmail.dat file itself. skip_attachment = False if mailpart.type == "text/plain": try: message_json['body'] += self.stoq.force_unicode( mailpart.get_payload()) except: pass skip_attachment = True else: if filename == "winmail.dat": tnef_results = TNEF(mailpart.get_payload()) # we have data, let's handle it. if tnef_results.attachments: # We have a valid file within winmail.dat, # let's make sure we only handle it here. skip_attachment = True for tnef_attachment in tnef_results.attachments: try: filename = self.stoq.force_unicode( tnef_attachment.name) except: filename = "None" try: attachment_json = self.handle_attachments( payload=tnef_attachment.data, filename=filename, uuid=message_json['uuid']) if attachment_json: message_json['att'].append( attachment_json) except: pass # Let's handle the attachment normally if not skip_attachment: attachment_json = self.handle_attachments( payload=mailpart.get_payload(), filename=filename, uuid=uuid) if attachment_json: attachment_json['desc'] = mailpart.part.get( 'Content-Description') attachment_json['type'] = mailpart.type message_json['att'].append(attachment_json) if self.use_bloom: # Check bloom filters for field_name, field_bloom in self.bloomfilters.items(): # If the configured field name exists in parsed data... if field_name in message_json: # extract the field value and check if it has been seen # before... field_value = message_json[field_name] seen_before = field_bloom.query_filter( field_value, add_missing=True) # Generate JSON entry key for flagging new field values field_flag = "{}_isnew".format(field_name) # if the value has not been seen before... if not seen_before: # flag it as new within JSON message_json[field_flag] = True else: message_json[field_flag] = False # Make sure we delete the body and body_html keys if they are to # be omitted if self.omit_body: message_json.pop('body', None) message_json.pop('body_html', None) yield message_json