def handle(self, *args, **options): infile = args[0] if args else options['infile'] if infile: logger.info('Parsing mail loaded by filename') if six.PY3: with open(infile, 'rb') as file_: mail = email.message_from_binary_file(file_) else: with open(infile) as file_: mail = email.message_from_file(file_) else: logger.info('Parsing mail loaded from stdin') if six.PY3: mail = email.message_from_binary_file(sys.stdin.buffer) else: mail = email.message_from_file(sys.stdin) try: result = parse_mail(mail, options['list_id']) if result: sys.exit(0) logger.warning('Failed to parse mail') sys.exit(1) except Exception: logger.exception('Error when parsing incoming email', extra={'mail': mail.as_string()})
def handle(self, *args, **options): infile = args[0] if args else options['infile'] try: if infile: logger.info('Parsing mail loaded by filename') with open(infile, 'rb') as file_: mail = email.message_from_binary_file(file_) else: logger.info('Parsing mail loaded from stdin') mail = email.message_from_binary_file(sys.stdin.buffer) except AttributeError: logger.warning("Broken email ignored") return # it's important to get exit codes correct here. The key is to allow # proper separation of real errors vs expected 'failures'. # # patch/comment parsed: 0 # no parseable content found: 0 # duplicate messages: 0 # db integrity/other db error: 1 # broken email (ValueError): 1 (this could be noisy, if it's an issue # we could use a different return code) try: result = parse_mail(mail, options['list_id']) if result is None: logger.warning('Nothing added to database') except DuplicateMailError as exc: logger.warning('Duplicate mail for message ID %s', exc.msgid) except (ValueError, Exception) as exc: logger.exception('Error when parsing incoming email: %s', repr(exc), extra={'mail': mail.as_string()}) sys.exit(1)
def test_write_mime_extra_headers(self): '''write_mime() with extra headers.''' pr = problem_report.ProblemReport(date='now!') pr['Simple'] = 'bar' pr['TwoLine'] = 'first\nsecond\n' io = BytesIO() pr.write_mime(io, extra_headers={'Greeting': 'hello world', 'Foo': 'Bar'}) io.seek(0) msg = email.message_from_binary_file(io) self.assertEqual(msg['Greeting'], 'hello world') self.assertEqual(msg['Foo'], 'Bar') parts = [p for p in msg.walk()] self.assertEqual(len(parts), 2) # first part is the multipart container self.assertTrue(parts[0].is_multipart()) # second part should be an inline text/plain attachments with all short # fields self.assertTrue(not parts[1].is_multipart()) self.assertEqual(parts[1].get_content_type(), 'text/plain') self.assertTrue(b'Simple: bar' in parts[1].get_payload(decode=True))
def _process(self, ev): # change name of the file asap, to minimize the probability of racing conditions # when processing across multiple instances of the app if ev.name.startswith("_"): return fn = self.spool_dir + "_" + ev.name try: shutil.move(ev.pathname, fn) except Exception as e: log.warning(">>>> possible MM4 file watcher racing condition: " + str(e)) return # parse the file content to get the from and to addresses try: with open(fn, "rb") as fh: msg = email.message_from_binary_file(fh) dispatch(fh.read(), msg.get('from'), email.utils.getaddresses(msg.get_all('to')) ) except email.errors.MessageParseError as me: log.warning(">>>> MM4 file watcher failed to parse {}: {}" .format(spool_fn, me) ) except Exception as e: log.debug(traceback.format_exc()) log.warning(">>>> MM4 file watcher failed: {}".format(e))
def setFromDraft(self, draft_id): with open_db() as db: msg = db.find_message(draft_id) with open(msg.get_filename(), 'rb') as fp: pymessage = email.message_from_binary_file( fp, policy=email.policy.default) self.msg = pymessage self.draft_id = draft_id self.subjectEdit.setText(self.msg['Subject']) self.rcptEdit.set_recipients( to=getaddresses(self.msg.get_all('To', [])), cc=getaddresses(self.msg.get_all('Cc', []))) body = pymessage.get_body(('plain', )) if body is not None: body = body.get_content() self.messageEdit.setPlainText(body) for attachment in pymessage.iter_attachments(): self.attachmentsList.addItem(attachment.get_filename() or 'untitled.attachment') self.attachmentList.show()
def setReply(self, reply_to, reply_all): assert reply_to info = self._get_reply_from_cli(reply_to, reply_all) for header in ('In-reply-to', 'References'): if header in info: self.msg[header] = info[header] self.subjectEdit.setText(info['Subject']) self.rcptEdit.set_recipients(to=getaddresses([info.get('To', '')]), cc=getaddresses([info.get('Cc', '')])) with open_db() as db: msg = db.find_message(reply_to) with open(msg.get_filename(), 'rb') as fp: pymessage = email.message_from_binary_file( fp, policy=email.policy.default) body = pymessage.get_body(('plain', )) if body is not None: body = body.get_content() parser = Parser() parsed = parser.parse(body) for block in parsed: indent_recursive(block) self.messageEdit.setPlainText(to_text(parsed))
def test_actual_emails(eml: Path, monkeypatch: pytest.MonkeyPatch) -> None: with eml.open("rb") as fp: msg = email.message_from_binary_file(fp, policy=policy.default) monkeypatch.syspath_prepend(EMAIL_DIR) module: Any = import_module(eml.stem) assert email2dict(msg) == module.data assert email2dict(msg, include_all=True) == module.data_all
def read_email(email_path): detector = UniversalDetector() result = {} with open(email_path, 'rb') as fp: msg = email.message_from_binary_file(fp, policy=default) try: with open(email_path) as f: original = f.read() except UnicodeDecodeError: detector.reset() with open(email_path, 'rb') as f: for line in f.readlines(): detector.feed(line) if detector.done: break detector.close() encoding = detector.result['encoding'] with open(email_path, encoding=encoding) as f: original = f.read() result['original_msg'] = original result['payload'] = msg.get_payload() result['text'] = parse_html_payload(result['payload']) try: for key, value in msg.items(): result[key] = value except Exception as e: logger.error('Problem parsing email: {}\n{}'.format(email_path, e)) try: result['Date'] = dateparser.parse(result['Date']).isoformat() except Exception as e: logger.error('Problem converting date: {}\n{}'.format( result.get('date'), e)) return result
def parse_mail(fileobj): headers, body = {}, [] msg = email.message_from_binary_file(fileobj) for k, v in msg.items(): headers[k] = quoted_printable(v).replace('\n', '').replace('\t', ' ').strip() for line in email.iterators.body_line_iterator(msg): body.append(line.replace("\n", "")) # Merge lines joined with "=\n" i = len(body) - 1 while i > 0: i -= 1 prev = body[i] if len(prev) == 74 and prev.endswith('='): body[i] = body[i][:-1] + body[i + 1] del body[i + 1] # Remove =20 from end of lines i = 0 while i < len(body): if body[i].endswith('=20'): body[i] = body[i][:-3] + ' ' i += 1 return headers, body
def eom(self): self.fp.seek(0) msg = email.message_from_binary_file(self.fp) # many milter functions can only be called from eom() # example of adding a Bcc: self.addrcpt('<%s>' % '*****@*****.**') return Milter.ACCEPT
def pyatl(): data = json.loads(request.data) if 'SubscribeURL' in data: url = data['SubscribeURL'] print('Subscribing to topic at {}'.format(url)) requests.get(url) return '' else: message = json.loads(data['Message']) bucket_name = message['receipt']['action']['bucketName'] object_key = message['receipt']['action']['objectKey'] sender = message['mail']['source'] url = 's3://{}/{}'.format(bucket_name, object_key) print('new message from {} at {}'.format(sender, url)) with tempfile.NamedTemporaryFile() as t: print('downloading message to {}'.format(t.name)) s3 = boto3.resource('s3') try: s3.Bucket(bucket_name).download_file(object_key, t.name) except botocore.exceptions.ClientError as e: if e.response['Error']['Code'] == "404": print("The object does not exist.") else: raise with open(t.name, 'rb') as f: m = email.message_from_binary_file(f, policy=default) for p in m.walk(): filename = p.get_filename() if not filename: continue print('found mime part {}'.format(filename)) payload = p.get_payload(decode=True) print(payload.decode()) return ''
def test_write_mime_order(self): '''write_mime() with keys ordered.''' pr = problem_report.ProblemReport(date='now!') pr['SecondText'] = 'What' pr['FirstText'] = 'Who' pr['FourthText'] = 'Today' pr['ThirdText'] = "I Don't Know" io = BytesIO() pr.write_mime(io, priority_fields=['FirstText', 'SecondText', 'ThirdText', 'Unknown', 'FourthText']) io.seek(0) msg = email.message_from_binary_file(io) parts = [p for p in msg.walk()] self.assertEqual(len(parts), 2) # first part is the multipart container self.assertTrue(parts[0].is_multipart()) # second part should be an inline text/plain attachments with all short # fields self.assertTrue(not parts[1].is_multipart()) self.assertEqual(parts[1].get_content_type(), 'text/plain') self.assertEqual(parts[1].get_content_charset(), 'utf-8') self.assertEqual(parts[1].get_filename(), None) self.assertEqual(parts[1].get_payload(decode=True), b'''FirstText: Who SecondText: What ThirdText: I Don't Know FourthText: Today ProblemType: Crash Date: now! ''')
def test_prepare_metadata_for_editable(tmp_path: Path) -> None: with build_fixture_project("demo-package"): dist_info = api.prepare_metadata_for_build_editable(tmp_path.as_posix()) assert dist_info == "demo_package-0.1.0.dist-info" with (tmp_path / dist_info / "METADATA").open("rb") as metadata: deps = email.message_from_binary_file(metadata).get_all("Requires-Dist") assert "editables" in deps
def test_file_sessions(self): """Make sure opening a connection creates a new file""" msg = EmailMessage('Subject', 'Content', '*****@*****.**', ['*****@*****.**'], headers={'From': '*****@*****.**'}) connection = mail.get_connection() connection.send_messages([msg]) self.assertEqual(len(os.listdir(self.tmp_dir)), 1) with open(os.path.join(self.tmp_dir, os.listdir(self.tmp_dir)[0]), 'rb') as fp: message = message_from_binary_file(fp) self.assertEqual(message.get_content_type(), 'text/plain') self.assertEqual(message.get('subject'), 'Subject') self.assertEqual(message.get('from'), '*****@*****.**') self.assertEqual(message.get('to'), '*****@*****.**') connection2 = mail.get_connection() connection2.send_messages([msg]) self.assertEqual(len(os.listdir(self.tmp_dir)), 2) connection.send_messages([msg]) self.assertEqual(len(os.listdir(self.tmp_dir)), 2) msg.connection = mail.get_connection() self.assertTrue(connection.open()) msg.send() self.assertEqual(len(os.listdir(self.tmp_dir)), 3) msg.send() self.assertEqual(len(os.listdir(self.tmp_dir)), 3) connection.close()
def parse_from_file(path): """Parse a single message from a file. :param path: The full path the the email file. :type path: str :return dict A dictionary with all the necessary data. """ data = None # Although we don't write anything into the file, we need to make # sure we can remove it. if os.access(path, os.R_OK | os.W_OK): with io.open(path, mode="rb") as read_file: mail = email.message_from_binary_file(read_file) data = extract_mail_values(mail) try: os.unlink(path) except PermissionError: log.error("Error removing file at '%s'", path) else: log.warning("Cannot access in 'rw' mode the file at '%s'", path) return data
def _process_multipart_body(self, mimeinput, charset): headers = io.BytesIO() lines = mimeinput.readpart() for line in lines: headers.write(line) if line == b'\r\n': break headers.seek(0) headers = email.message_from_binary_file(headers) ctype, ctype_params = parse_header(headers.get('content-type', '')) if ctype and 'charset' in ctype_params: charset = ctype_params['charset'] cdisp, cdisp_params = parse_header( headers.get('content-disposition', '')) if not cdisp: raise RequestError('expected Content-Disposition header') name = cdisp_params.get('name') filename = cdisp_params.get('filename') if not (cdisp == 'form-data' and name): raise RequestError('expected Content-Disposition: form-data' 'with a "name" parameter: got %r' % headers.get('content-disposition', '')) # FIXME: should really to handle Content-Transfer-Encoding and other # MIME complexity here. See RFC2048 for the full horror story. if filename: # it might be large file upload so use a temporary file upload = Upload(filename, ctype, charset) upload.receive(lines) _add_field_value(self.form, name, upload) else: value = _decode_string(b''.join(lines), charset or self.charset) _add_field_value(self.form, name, value)
def get_time(path): with open(path, 'rb') as source: email = message_from_binary_file(source) if email[HDR_DATE]: return time_from_date(email[HDR_DATE]) else: raise BadTime()
def handle(self, *args, **options): monitoring = Monitoring.objects.get(pk=options["monitoring_pk"]) for case in monitoring.case_set.all(): ids = set() for letter in (Letter.objects.filter( record__case=case.pk).is_incoming().all()): self.stdout.write("Processing letter: {}".format(letter.pk)) if not letter.eml: self.stdout.write("Skipping {} due missing eml.".format( letter.pk)) continue content = letter.eml.file.read() fp = BytesIO(content) if b"Subject:" not in content: fp = gzip.GzipFile(fileobj=fp) msg = email.message_from_binary_file(fp) msg_id = msg.get("Message-ID") if not msg_id: self.stdout.write( "Skipping {} due missing 'Message-ID'.".format( letter.pk)) continue if msg_id not in ids: self.stdout.write( "Skipping {} due unique 'Message-ID': {}".format( letter.pk, msg_id)) ids.add(msg_id) continue self.stdout.write( "Removing {} due duplicated 'Message-ID': {}".format( letter.pk, msg_id)) if options["delete"]: letter.delete()
def large_email(): """A large email with several images as attachments""" with open(TESTS_HERE / "data/1604232551.M582355P21675Q3.hubhero-demo", "rb") as fp: msg = email.message_from_binary_file(fp, policy=default) yield msg
def parse_mail(file_in): """ Extract Subject & Body of mail file headers must be formatted as a block of RFC 2822 style input: file path output: dict """ # We open the file and then divide it in different parts. with open(file_in, 'rb') as INFILE: raw_mail = email.message_from_binary_file(INFILE) charset = raw_mail.get_charsets()[0] formated_mail = { "body": raw_mail.get_payload(decode=True).decode(charset), "subject": str(email.header.make_header(email.header.decode_header(raw_mail["Subject"]))) , "encoding": raw_mail['content-type'] } date = os.path.dirname(file_in).split('/').pop() + '-' name = os.path.splitext(os.path.basename(file_in))[0] formated_mail['name'] = date+name formated_mail = dc_remove_adresses(formated_mail) formated_mail = dc_remove_url(formated_mail) return formated_mail
def test_bogus_content_charset(self): with path('mailman.email.tests.data', 'bad_email_3.eml') as email_path: with open(str(email_path), 'rb') as fp: msg = message_from_binary_file(fp, Message) fp.seek(0) text = fp.read().decode('ascii', 'replace') self.assertEqual(msg.as_string(), text)
def parse_blob_info(field_storage): """Parse a BlobInfo record from file upload field_storage. Args: field_storage: cgi.FieldStorage that represents uploaded blob. Returns: BlobInfo record as parsed from the field-storage instance. None if there was no field_storage. Raises: BlobInfoParseError when provided field_storage does not contain enough information to construct a BlobInfo object. """ if field_storage is None: return None field_name = field_storage.name def get_value(dct, name): value = dct.get(name, None) if value is None: raise BlobInfoParseError('Field %s has no %s.' % (field_name, name)) return value filename = get_value(field_storage.disposition_options, 'filename') blob_key_str = get_value(field_storage.type_options, 'blob-key') blob_key = BlobKey(blob_key_str) if six.PY2: upload_content = email.message_from_file(field_storage.file) else: upload_content = email.message_from_binary_file(field_storage.file) content_type = get_value(upload_content, 'content-type') size = get_value(upload_content, 'content-length') creation_string = get_value(upload_content, UPLOAD_INFO_CREATION_HEADER) md5_hash_encoded = get_value(upload_content, 'content-md5') md5_hash = base64.urlsafe_b64decode(md5_hash_encoded) try: size = int(size) except (TypeError, ValueError): raise BlobInfoParseError('%s is not a valid value for %s size.' % (size, field_name)) try: creation = blobstore._parse_creation(creation_string, field_name) except blobstore._CreationFormatError as err: raise BlobInfoParseError(str(err)) return BlobInfo( id=blob_key_str, content_type=content_type, creation=creation, filename=filename, size=size, md5_hash=md5_hash, )
def timerEvent(self, ev): if ev.timerId() != self.timer.timerId(): super(ExcerptBuilder, self).timerEvent(ev) return message_id, filename = self.queue.popleft() if not self.queue: self.timer.stop() if not hasattr(notmuch.Message, 'add_property'): # FIXME remove condition when function is integrated in notmuch bindings return with open(filename, 'rb') as fp: pymessage = email.message_from_binary_file( fp, policy=email.policy.default) text = self._getExcerptPlainText(pymessage) if text is None: if HAS_HTML2TEXT: text = self._getExcerptHtml(pymessage) else: return if not text: text = '' text = re.sub(r'\s+', ' ', text) text = text[:100] with open_db_rw() as db: message = db.find_message(message_id) message.add_property(self.PROPERTY, text) self.builtExcerpt.emit(message_id, text)
def test_write_mime_extra_headers(self): '''write_mime() with extra headers.''' pr = problem_report.ProblemReport(date='now!') pr['Simple'] = 'bar' pr['TwoLine'] = 'first\nsecond\n' io = BytesIO() pr.write_mime(io, extra_headers={ 'Greeting': 'hello world', 'Foo': 'Bar' }) io.seek(0) msg = email.message_from_binary_file(io) self.assertEqual(msg['Greeting'], 'hello world') self.assertEqual(msg['Foo'], 'Bar') parts = [p for p in msg.walk()] self.assertEqual(len(parts), 2) # first part is the multipart container self.assertTrue(parts[0].is_multipart()) # second part should be an inline text/plain attachments with all short # fields self.assertTrue(not parts[1].is_multipart()) self.assertEqual(parts[1].get_content_type(), 'text/plain') self.assertTrue(b'Simple: bar' in parts[1].get_payload(decode=True))
def test_simple_utf8_file(self): mail = email.message_from_binary_file( open('tests/static/mail/utf8.eml', 'rb'), _class=email.message.EmailMessage) actual = utils.extract_body(mail) expected = "Liebe Grüße!\n" self.assertEqual(actual, expected)
def message2email(msg: Message) -> EmailMessage: """ Convert an instance of the old `Message` class (or one of its subclasses, like a `mailbox` message class) to an instance of the new `EmailMessage` class with the ``default`` policy. If ``msg`` is already an `EmailMessage`, it is returned unchanged. """ if isinstance(msg, EmailMessage): return msg # Message.as_bytes() refolds long header lines (which can result in changes # in whitespace after reparsing) and doesn't give a way to change this, so # we need to use a BytesGenerator manually. fp = BytesIO() # TODO: Instead of maxheaderlen, use a policy with refold_source=None? g = BytesGenerator(fp, mangle_from_=False, maxheaderlen=0) g.flatten(msg, unixfrom=msg.get_unixfrom() is not None) fp.seek(0) emsg = email.message_from_binary_file(fp, policy=policy.default) assert isinstance(emsg, EmailMessage) # MMDFMessage and mboxMessage make their "From " lines available though a # different method than normal Messages, so we have to copy it over # manually. if isinstance(msg, (MMDFMessage, mboxMessage)): emsg.set_unixfrom("From " + msg.get_from()) return emsg
def decode_email(eml_file: str, include_raw_body: bool = False, include_attachment_data: bool = False, pconf: typing.Optional[dict] = None, policy: email.policy.Policy = email.policy.default) -> dict: """Function for decoding an EML file into an easily parsable structure. Some intelligence is applied while parsing the file in order to work around broken files. Besides just parsing, this function also computes hashes and extracts meta information from the source file. Args: eml_file (str): Full absolute path to the file to be parsed. include_raw_body (bool, optional): Boolean paramter which indicates whether to include the original file contents in the returned structure. Default is False. include_attachment_data (bool, optional): Boolean paramter which indicates whether to include raw attachment data in the returned structure. Default is False. pconf (dict, optional): A dict with various optinal configuration parameters, e.g. whitelist IPs, whitelist e-mail addresses, etc. policy (email.policy.Policy, optional): Policy to use when parsing e-mails. Default = email.policy.default. Returns: dict: A dictionary with the content of the EML parsed and broken down into key-value pairs. """ with open(eml_file, 'rb') as fp: msg = email.message_from_binary_file(fp, policy=policy) return parse_email(msg, include_raw_body, include_attachment_data, pconf)
def __init__(self, filepath, category=None): self.filepath = filepath self.category = category if sys.version_info > (3, 0): self.mail = email.message_from_binary_file(filepath) else: self.mail = email.message_from_file(self.filepath)
def unsubscribe_message(): """ Pytest fixture for unsubscribe email """ with open("./test_data/unsubscribe_email.eml", "rb") as f: message = email.message_from_binary_file(f) return message
def message_with_code(): """ Pytest fixture """ with open("./test_data/code_email.eml", "rb") as f: message = email.message_from_binary_file(f) return message
def message_with_one_attachment(): """ Pytest fixture """ with open("./test_data/pdf_one_email.eml", "rb") as f: message = email.message_from_binary_file(f) return message
def from_file(fileobj): """Reads an email and cleans it up to make a MailBase.""" try: msg = email.message_from_file(fileobj) except TypeError: fileobj.seek(0) msg = email.message_from_binary_file(fileobj) return from_message(msg)
def load_mail(file_path): if six.PY3: with open(file_path, 'rb') as f: mail = email.message_from_binary_file(f) else: with open(file_path) as f: mail = email.message_from_file(f) return mail
def __init__(self, infile, category=None): self.category = category if sys.version_info > (3, 0): # Python 3 code in this block self.mail = email.message_from_binary_file(infile) else: # Python 2 code in this block self.mail = email.message_from_file(infile)
def parse_message_id(file_): '''Returns the message id for a given file. It is assumed that file represents a valid RFC822 message''' msg = email.message_from_binary_file(file_) msg_id = "" if "Message-ID" in msg: msg_id = msg["Message-ID"].strip("<>") return msg_id
def message(self): """ Read mail, parse it and return a Message instance. """ logger.debug("Parsing mail at {} ...".format(self.path)) with open(self.path, 'rb') as mail_file: if PY2: message = email.message_from_file(mail_file) else: message = email.message_from_binary_file(mail_file) return message
def get_msgs_from_maildir(maildir, cachefile): mails = [] for f in os.listdir(maildir): with open(os.path.join(maildir, f), 'rb') as fp: mails.append(email.message_from_binary_file(fp)) #TODO: insort() set_cache(cachefile, mails) return mails
def email_html(file): infile = open(file, "rb") message = email.message_from_binary_file(infile) if message.is_multipart(): for part in message.get_payload(): body = part.get_payload() break else: body = message.get_payload(decode=True) return quopri.decodestring(body).decode()
def message_from_binary_file(fp, *args, **kws): """ Read a binary file and parse its contents into a L{PyzMessage} object model. B{(Python >= 3.2)} @type fp: binary_file @param fp: the input file, must be open in binary mode @rtype: L{PyzMessage} @return: the L{PyzMessage} object """ return PyzMessage(email.message_from_binary_file(fp, *args, **kws))
def handle(self, *args, **options): infile = args[0] if args else options['infile'] try: if infile: logger.info('Parsing mail loaded by filename') if six.PY3: with open(infile, 'rb') as file_: mail = email.message_from_binary_file(file_) else: with open(infile) as file_: mail = email.message_from_file(file_) else: logger.info('Parsing mail loaded from stdin') if six.PY3: mail = email.message_from_binary_file(sys.stdin.buffer) else: mail = email.message_from_file(sys.stdin) except AttributeError: logger.warning("Broken email ignored") return # it's important to get exit codes correct here. The key is to allow # proper separation of real errors vs expected 'failures'. # # patch/comment parsed: 0 # no parseable content found: 0 # duplicate messages: 0 # db integrity/other db error: 1 # broken email (ValueError): 1 (this could be noisy, if it's an issue # we could use a different return code) try: result = parse_mail(mail, options['list_id']) if result is None: logger.warning('Nothing added to database') except DuplicateMailError as exc: logger.warning('Duplicate mail for message ID %s', exc.msgid) except (ValueError, Exception) as exc: logger.exception('Error when parsing incoming email: %s', exc.message, extra={'mail': mail.as_string()}) sys.exit(1)
def test_file_with_attachement(self): with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data/test_attachment.eml'), mode='br') as msg: imap_client = yield from self.login_user('user@mail', 'pass', select=True) mail = Mail(email.message_from_binary_file(msg)) self.imapserver.receive(mail, imap_user='******') result, data = yield from imap_client.fetch('1', '(RFC822)') self.assertEqual('OK', result) self.assertEqual(['1 FETCH (RFC822 {418898}', mail.as_bytes(), ')', 'FETCH completed.'], data)
def _parse_email_fixture(self, fixture_path: str) -> Message: if not self._does_fixture_path_exist(fixture_path): print('Fixture {} does not exist'.format(fixture_path)) exit(1) if fixture_path.endswith('.json'): message = self._parse_email_json_fixture(fixture_path) else: with open(fixture_path, "rb") as fp: message = email.message_from_binary_file(fp) return message
def import_package_news(self, package_directory_name): package_name = os.path.basename(package_directory_name) self.write('Processing package {pkg}...'.format(pkg=package_name)) try: package = PackageName.objects.get(name=package_name) except PackageName.DoesNotExist: self.write('Package does not exist. Skipping messages...') return news_directory = os.path.join(package_directory_name, 'news') if not os.path.exists(news_directory): self.write("Package has no news directory.") return email_news = [] for news_file in sorted(os.listdir(news_directory)): news_file_path = os.path.join(news_directory, news_file) try: with open(news_file_path, 'rb') as f: if hasattr(email, 'message_from_binary_file'): msg = email.message_from_binary_file(f) else: msg = email.message_from_file(f) if 'Date' in msg: timestamp = email.utils.mktime_tz( email.utils.parsedate_tz(msg['Date'])) date = datetime.utcfromtimestamp(timestamp) date = timezone.make_aware(date, timezone.utc) else: date = timezone.now() news_kwargs = EmailNews.get_email_news_parameters(msg) content = news_kwargs.pop('file_content') news_kwargs['news_file'] = ContentFile(content, name='news-file') email_news.append(News( package=package, datetime_created=date, **news_kwargs)) except Exception: import traceback traceback.print_exc() self.write('Problem importing news {}'.format(news_file_path)) self.write("All news for the package processed. " "Bulk creating the instances.") News.objects.bulk_create(email_news) self.write('Complete.')
def get_message_parts(self): """Output like notmuch show""" fp = open(self.get_filename(), 'rb') if sys.version_info[0] < 3: email_msg = email.message_from_file(fp) else: email_msg = email.message_from_binary_file(fp) fp.close() out = [] for msg in email_msg.walk(): if not msg.is_multipart(): out.append(msg) return out
def read_mail(filename, project=None): """Read a mail from a file.""" file_path = os.path.join(TEST_MAIL_DIR, filename) if six.PY3: with open(file_path, 'rb') as f: mail = email.message_from_binary_file(f) else: with open(file_path) as f: mail = email.message_from_file(f) if 'Message-Id' not in mail: mail['Message-Id'] = make_msgid() if project: mail['List-Id'] = project.listid return mail
def mail(self): if self._mail is None: if hasattr(email, 'message_from_binary_file'): self._mail = email.message_from_binary_file( open(self._msg.get_filename(), 'br')) else: if (3, 1) <= sys.version_info < (3, 2): fp = codecs.open(self._msg.get_filename(), 'r', 'utf-8', errors='replace') else: fp = open(self._msg.get_filename()) self._mail = email.message_from_file(fp) return self._mail
def m(increment: ('prev', 'show', 'next'), thing = None, *, showproc = None, showmimeproc = None, nocheckmime: bool = False, noheader: bool = False, draft: bool = False, maildir = None): ''' Show messages :param increment: Previous, current, or next message :param thing: Folder (+folder) or message (message-id) to display, defaults current message in current folder :param showproc: Program to display text messages, in case you don't want to use the default :param showmimeproc: Program to display MIME messages, in case you don't want to use the default :param nocheckmime: Don't check for MIME messages :param noheader: Don't display the header :param draft: I don't understand this one. ''' configuration = db.read_configuration() if not maildir: maildir = configuration['maildir'] mhdir = db.MHDir(maildir) if thing: if thing.startswith('+'): mhdir.folder = thing[1:] else: mhdir.message = thing if not (thing or mhdir.has_current_folder()): sys.stderr.write('''Set the current folder, like this. m show +INBOX ''') exit(1) if mhdir.message: if increment in {'prev', 'next'}: mhdir.message = db.prev_cur_next(mhdir.message)[increment] with mhdir.message.open('rb') as fp: message = message_from_binary_file(fp) print(parse.body(message)) else: print('No messages')
def gauge_email_sentiment(file, input_dir, positives, negatives, header, analyzer): with open(os.path.join(input_dir,file), 'rb') as email_fp: msg = email.message_from_binary_file(email_fp, policy=default) msg_body = msg.get_body(preferencelist=('plain')) # We include the subject in scoring the message score = analyzer.polarity_scores("{} {}".format(msg['subject'], msg_body.get_content())) # This would score without the subject # score = analyzer.polarity_scores(msg_body.get_content()) # Put the scores in the envelope for later use msg.add_header(header, str(score)) # Decide where to put the message if score['compound'] < 0: output_path = negatives else: output_path = positives # Write the message out, using the same filename with open(os.path.join(output_path,file), "wb") as out_fp: out_fp.write(msg.as_bytes())
def parse(self): try: if self.message_type == 'string': msg = email.message_from_string(self.raw_message, Message) elif self.message_type == 'bytes': msg = email.message_from_bytes(self.raw_message, Message) elif self.message_type == 'binary_file': msg = email.message_from_binary_file(self.raw_message, Message) else: raise ValueError('Invalid message_type, could not parse message.') except Exception: raise ParseEmailException # Do basic post-processing of the message, checking it for defects or # other missing information. if msg.defects: raise DefectMessageException # Add headers used in Holonet msg.original_size = len(self.raw_message) msg['X-MailFrom'] = self.mail_from return msg
def smart_parser(input): """ Use the appropriate parser and return a email.message.Message object (this is not a L{PyzMessage} object) @type input: string, file, bytes, binary_file or email.message.Message @param input: the source of the message @rtype: email.message.Message @returns: the message """ if isinstance(input, email.message.Message): return input if sys.version_info < (3, 0): # python 2.x if isinstance(input, basestring): return email.message_from_string(input) elif hasattr(input, 'read') and hasattr(input, 'readline'): return email.message_from_file(input) else: raise ValueError, 'input must be a string, a file or a Message' else: # python 3.x if isinstance(input, str): return email.message_from_string(input) elif isinstance(input, bytes): # python >= 3.2 only return email.message_from_bytes(input) elif hasattr(input, 'read') and hasattr(input, 'readline'): if hasattr(input, 'encoding'): # python >= 3.2 only return email.message_from_file(input) else: return email.message_from_binary_file(input) else: raise ValueError,\ 'input must be a string a bytes, a file or a Message'
def test_write_mime_filter(self): '''write_mime() with key filters.''' pr = problem_report.ProblemReport(date='now!') pr['GoodText'] = 'Hi' pr['BadText'] = 'YouDontSeeMe' pr['GoodBin'] = bin_data pr['BadBin'] = 'Y' + '\x05' * 10 + '-' io = BytesIO() pr.write_mime(io, skip_keys=['BadText', 'BadBin']) io.seek(0) msg = email.message_from_binary_file(io) parts = [p for p in msg.walk()] self.assertEqual(len(parts), 3) # first part is the multipart container self.assertTrue(parts[0].is_multipart()) # second part should be an inline text/plain attachments with all short # fields self.assertTrue(not parts[1].is_multipart()) self.assertEqual(parts[1].get_content_type(), 'text/plain') self.assertEqual(parts[1].get_content_charset(), 'utf-8') self.assertEqual(parts[1].get_filename(), None) self.assertEqual(parts[1].get_payload(decode=True), b'''ProblemType: Crash Date: now! GoodText: Hi ''') # third part should be the GoodBin: field as attachment self.assertTrue(not parts[2].is_multipart()) f = tempfile.TemporaryFile() f.write(parts[2].get_payload(decode=True)) f.seek(0) self.assertEqual(gzip.GzipFile(mode='rb', fileobj=f).read(), bin_data) f.close()
def extract_mail_body(message): r''' Extract the plain text body of the message with signatures stripped off. :param message: the message to extract the body from :type message: :class:`notmuch.Message` :returns: the extracted text body :rtype: :class:`list` of :class:`str` ''' if hasattr(email, 'message_from_binary_file'): mail = email.message_from_binary_file(open(message.get_filename(), 'br')) else: if (3, 1) <= sys.version_info < (3, 2): fp = codecs.open(message.get_filename(), 'r', 'utf-8', errors='replace') else: fp = open(message.get_filename()) mail = email.message_from_file(fp) content = [] for part in mail.walk(): if part.get_content_type() == 'text/plain': raw_payload = part.get_payload(decode=True) encoding = part.get_content_charset() if encoding: try: raw_payload = raw_payload.decode(encoding, 'replace') except LookupError: raw_payload = raw_payload.decode(sys.getdefaultencoding(), 'replace') else: raw_payload = raw_payload.decode(sys.getdefaultencoding(), 'replace') lines = raw_payload.split('\n') lines = strip_signatures(lines) content.append('\n'.join(lines)) return '\n'.join(content)
def main(): parser = ArgumentParser(description="""\ Unpack a MIME message into a directory of files. """) parser.add_argument('-d', '--directory', required=True, help="""Unpack the MIME message into the named directory, which will be created if it doesn't already exist.""") parser.add_argument('msgfile') args = parser.parse_args() with open(args.msgfile, 'rb') as fp: msg = email.message_from_binary_file(fp, policy=default) try: os.mkdir(args.directory) except FileExistsError: pass counter = 1 for part in msg.walk(): # multipart/* are just containers if part.get_content_maintype() == 'multipart': continue # Applications should really sanitize the given filename so that an # email message can't be used to overwrite important files filename = part.get_filename() if not filename: ext = mimetypes.guess_extension(part.get_content_type()) if not ext: # Use a generic bag-of-bits extension ext = '.bin' filename = 'part-%03d%s' % (counter, ext) counter += 1 with open(os.path.join(args.directory, filename), 'wb') as fp: fp.write(part.get_payload(decode=True))
#!/usr/bin/python3 import sys import email v = [] m = email.message_from_binary_file(sys.stdin.buffer) for hdr in ('to', 'from', 'cc'): r = m.get_all(hdr, []) for addr in r: v.append(email.utils.parseaddr(addr)) for realname, addr in v: print('%s\t%s' % (addr, realname.replace('\t', ' ')))