Ejemplo n.º 1
0
    def handle(self, *args, **options):
        infile = args[0] if args else options['infile']

        if infile:
            logger.info('Parsing mail loaded by filename')
            if six.PY3:
                with open(infile, 'rb') as file_:
                    mail = email.message_from_binary_file(file_)
            else:
                with open(infile) as file_:
                    mail = email.message_from_file(file_)
        else:
            logger.info('Parsing mail loaded from stdin')
            if six.PY3:
                mail = email.message_from_binary_file(sys.stdin.buffer)
            else:
                mail = email.message_from_file(sys.stdin)
        try:
            result = parse_mail(mail, options['list_id'])
            if result:
                sys.exit(0)
            logger.warning('Failed to parse mail')
            sys.exit(1)
        except Exception:
            logger.exception('Error when parsing incoming email',
                             extra={'mail': mail.as_string()})
Ejemplo n.º 2
0
    def handle(self, *args, **options):
        infile = args[0] if args else options['infile']

        try:
            if infile:
                logger.info('Parsing mail loaded by filename')
                with open(infile, 'rb') as file_:
                    mail = email.message_from_binary_file(file_)
            else:
                logger.info('Parsing mail loaded from stdin')
                mail = email.message_from_binary_file(sys.stdin.buffer)
        except AttributeError:
            logger.warning("Broken email ignored")
            return

        # it's important to get exit codes correct here. The key is to allow
        # proper separation of real errors vs expected 'failures'.
        #
        # patch/comment parsed:        0
        # no parseable content found:  0
        # duplicate messages:          0
        # db integrity/other db error: 1
        # broken email (ValueError):   1 (this could be noisy, if it's an issue
        #                                 we could use a different return code)
        try:
            result = parse_mail(mail, options['list_id'])
            if result is None:
                logger.warning('Nothing added to database')
        except DuplicateMailError as exc:
            logger.warning('Duplicate mail for message ID %s', exc.msgid)
        except (ValueError, Exception) as exc:
            logger.exception('Error when parsing incoming email: %s',
                             repr(exc),
                             extra={'mail': mail.as_string()})
            sys.exit(1)
Ejemplo n.º 3
0
    def handle(self, *args, **options):
        infile = args[0] if args else options['infile']

        if infile:
            logger.info('Parsing mail loaded by filename')
            if six.PY3:
                with open(infile, 'rb') as file_:
                    mail = email.message_from_binary_file(file_)
            else:
                with open(infile) as file_:
                    mail = email.message_from_file(file_)
        else:
            logger.info('Parsing mail loaded from stdin')
            if six.PY3:
                mail = email.message_from_binary_file(sys.stdin.buffer)
            else:
                mail = email.message_from_file(sys.stdin)
        try:
            result = parse_mail(mail, options['list_id'])
            if result:
                sys.exit(0)
            logger.warning('Failed to parse mail')
            sys.exit(1)
        except Exception:
            logger.exception('Error when parsing incoming email',
                             extra={'mail': mail.as_string()})
Ejemplo n.º 4
0
    def test_write_mime_extra_headers(self):
        '''write_mime() with extra headers.'''

        pr = problem_report.ProblemReport(date='now!')
        pr['Simple'] = 'bar'
        pr['TwoLine'] = 'first\nsecond\n'
        io = BytesIO()
        pr.write_mime(io, extra_headers={'Greeting': 'hello world',
                                         'Foo': 'Bar'})
        io.seek(0)

        msg = email.message_from_binary_file(io)
        self.assertEqual(msg['Greeting'], 'hello world')
        self.assertEqual(msg['Foo'], 'Bar')
        parts = [p for p in msg.walk()]
        self.assertEqual(len(parts), 2)

        # first part is the multipart container
        self.assertTrue(parts[0].is_multipart())

        # second part should be an inline text/plain attachments with all short
        # fields
        self.assertTrue(not parts[1].is_multipart())
        self.assertEqual(parts[1].get_content_type(), 'text/plain')
        self.assertTrue(b'Simple: bar' in parts[1].get_payload(decode=True))
Ejemplo n.º 5
0
 def _process(self, ev):
     # change name of the file asap, to minimize the probability of racing conditions 
     # when processing across multiple instances of the app
     if ev.name.startswith("_"): return
     fn = self.spool_dir + "_" + ev.name
     try:
         shutil.move(ev.pathname, fn)
     except Exception as e:
         log.warning(">>>> possible MM4 file watcher racing condition: " + str(e))
         return
     # parse the file content to get the from and to addresses
     try:
         with open(fn, "rb") as fh:
             msg = email.message_from_binary_file(fh)
             dispatch(fh.read(),
                 msg.get('from'), 
                 email.utils.getaddresses(msg.get_all('to')) 
             )
     except email.errors.MessageParseError as me:
         log.warning(">>>> MM4 file watcher failed to parse {}: {}"
             .format(spool_fn, me)
         )
     except Exception as e:
         log.debug(traceback.format_exc())
         log.warning(">>>> MM4 file watcher failed: {}".format(e))
Ejemplo n.º 6
0
    def setFromDraft(self, draft_id):
        with open_db() as db:
            msg = db.find_message(draft_id)
            with open(msg.get_filename(), 'rb') as fp:
                pymessage = email.message_from_binary_file(
                    fp, policy=email.policy.default)

        self.msg = pymessage
        self.draft_id = draft_id

        self.subjectEdit.setText(self.msg['Subject'])

        self.rcptEdit.set_recipients(
            to=getaddresses(self.msg.get_all('To', [])),
            cc=getaddresses(self.msg.get_all('Cc', [])))

        body = pymessage.get_body(('plain', ))
        if body is not None:
            body = body.get_content()
            self.messageEdit.setPlainText(body)

        for attachment in pymessage.iter_attachments():
            self.attachmentsList.addItem(attachment.get_filename()
                                         or 'untitled.attachment')
            self.attachmentList.show()
Ejemplo n.º 7
0
    def setReply(self, reply_to, reply_all):
        assert reply_to

        info = self._get_reply_from_cli(reply_to, reply_all)
        for header in ('In-reply-to', 'References'):
            if header in info:
                self.msg[header] = info[header]

        self.subjectEdit.setText(info['Subject'])

        self.rcptEdit.set_recipients(to=getaddresses([info.get('To', '')]),
                                     cc=getaddresses([info.get('Cc', '')]))

        with open_db() as db:
            msg = db.find_message(reply_to)
            with open(msg.get_filename(), 'rb') as fp:
                pymessage = email.message_from_binary_file(
                    fp, policy=email.policy.default)

        body = pymessage.get_body(('plain', ))
        if body is not None:
            body = body.get_content()
            parser = Parser()
            parsed = parser.parse(body)
            for block in parsed:
                indent_recursive(block)
            self.messageEdit.setPlainText(to_text(parsed))
Ejemplo n.º 8
0
def test_actual_emails(eml: Path, monkeypatch: pytest.MonkeyPatch) -> None:
    with eml.open("rb") as fp:
        msg = email.message_from_binary_file(fp, policy=policy.default)
    monkeypatch.syspath_prepend(EMAIL_DIR)
    module: Any = import_module(eml.stem)
    assert email2dict(msg) == module.data
    assert email2dict(msg, include_all=True) == module.data_all
Ejemplo n.º 9
0
def read_email(email_path):
    detector = UniversalDetector()
    result = {}
    with open(email_path, 'rb') as fp:
        msg = email.message_from_binary_file(fp, policy=default)
    try:
        with open(email_path) as f:
            original = f.read()
    except UnicodeDecodeError:
        detector.reset()
        with open(email_path, 'rb') as f:
            for line in f.readlines():
                detector.feed(line)
                if detector.done:
                    break
        detector.close()
        encoding = detector.result['encoding']
        with open(email_path, encoding=encoding) as f:
            original = f.read()
    result['original_msg'] = original
    result['payload'] = msg.get_payload()
    result['text'] = parse_html_payload(result['payload'])
    try:
        for key, value in msg.items():
            result[key] = value
    except Exception as e:
        logger.error('Problem parsing email: {}\n{}'.format(email_path, e))
    try:
        result['Date'] = dateparser.parse(result['Date']).isoformat()
    except Exception as e:
        logger.error('Problem converting date: {}\n{}'.format(
            result.get('date'), e))
    return result
Ejemplo n.º 10
0
def parse_mail(fileobj):
    headers, body = {}, []
    msg = email.message_from_binary_file(fileobj)

    for k, v in msg.items():
        headers[k] = quoted_printable(v).replace('\n', '').replace('\t', ' ').strip()

    for line in email.iterators.body_line_iterator(msg):
        body.append(line.replace("\n", ""))

    # Merge lines joined with "=\n"
    i = len(body) - 1
    while i > 0:
        i -= 1
        prev = body[i]
        if len(prev) == 74 and prev.endswith('='):
            body[i] = body[i][:-1] + body[i + 1]
            del body[i + 1]

    # Remove =20 from end of lines
    i = 0
    while i < len(body):
        if body[i].endswith('=20'):
            body[i] = body[i][:-3] + ' '
        i += 1

    return headers, body
Ejemplo n.º 11
0
 def eom(self):
     self.fp.seek(0)
     msg = email.message_from_binary_file(self.fp)
     # many milter functions can only be called from eom()
     # example of adding a Bcc:
     self.addrcpt('<%s>' % '*****@*****.**')
     return Milter.ACCEPT
Ejemplo n.º 12
0
def pyatl():
    data = json.loads(request.data)
    if 'SubscribeURL' in data:
        url = data['SubscribeURL']
        print('Subscribing to topic at {}'.format(url))
        requests.get(url)
        return ''
    else:
        message = json.loads(data['Message'])
        bucket_name = message['receipt']['action']['bucketName']
        object_key = message['receipt']['action']['objectKey']
        sender = message['mail']['source']
        url = 's3://{}/{}'.format(bucket_name, object_key)
        print('new message from {} at {}'.format(sender, url))
        with tempfile.NamedTemporaryFile() as t:
            print('downloading message to {}'.format(t.name))
            s3 = boto3.resource('s3')

            try:
                s3.Bucket(bucket_name).download_file(object_key, t.name)
            except botocore.exceptions.ClientError as e:
                if e.response['Error']['Code'] == "404":
                    print("The object does not exist.")
                else:
                    raise
            with open(t.name, 'rb') as f:
                m = email.message_from_binary_file(f, policy=default)
            for p in m.walk():
                filename = p.get_filename()
                if not filename:
                    continue
                print('found mime part {}'.format(filename))
                payload = p.get_payload(decode=True)
                print(payload.decode())
        return ''
Ejemplo n.º 13
0
    def test_write_mime_order(self):
        '''write_mime() with keys ordered.'''

        pr = problem_report.ProblemReport(date='now!')
        pr['SecondText'] = 'What'
        pr['FirstText'] = 'Who'
        pr['FourthText'] = 'Today'
        pr['ThirdText'] = "I Don't Know"
        io = BytesIO()
        pr.write_mime(io, priority_fields=['FirstText', 'SecondText',
                                           'ThirdText', 'Unknown', 'FourthText'])
        io.seek(0)

        msg = email.message_from_binary_file(io)
        parts = [p for p in msg.walk()]
        self.assertEqual(len(parts), 2)

        # first part is the multipart container
        self.assertTrue(parts[0].is_multipart())

        # second part should be an inline text/plain attachments with all short
        # fields
        self.assertTrue(not parts[1].is_multipart())
        self.assertEqual(parts[1].get_content_type(), 'text/plain')
        self.assertEqual(parts[1].get_content_charset(), 'utf-8')
        self.assertEqual(parts[1].get_filename(), None)
        self.assertEqual(parts[1].get_payload(decode=True), b'''FirstText: Who
SecondText: What
ThirdText: I Don't Know
FourthText: Today
ProblemType: Crash
Date: now!
''')
Ejemplo n.º 14
0
def test_prepare_metadata_for_editable(tmp_path: Path) -> None:
    with build_fixture_project("demo-package"):
        dist_info = api.prepare_metadata_for_build_editable(tmp_path.as_posix())
        assert dist_info == "demo_package-0.1.0.dist-info"
        with (tmp_path / dist_info / "METADATA").open("rb") as metadata:
            deps = email.message_from_binary_file(metadata).get_all("Requires-Dist")
        assert "editables" in deps
Ejemplo n.º 15
0
Archivo: tests.py Proyecto: 6ft/django
    def test_file_sessions(self):
        """Make sure opening a connection creates a new file"""
        msg = EmailMessage('Subject', 'Content', '*****@*****.**', ['*****@*****.**'], headers={'From': '*****@*****.**'})
        connection = mail.get_connection()
        connection.send_messages([msg])

        self.assertEqual(len(os.listdir(self.tmp_dir)), 1)
        with open(os.path.join(self.tmp_dir, os.listdir(self.tmp_dir)[0]), 'rb') as fp:
            message = message_from_binary_file(fp)
        self.assertEqual(message.get_content_type(), 'text/plain')
        self.assertEqual(message.get('subject'), 'Subject')
        self.assertEqual(message.get('from'), '*****@*****.**')
        self.assertEqual(message.get('to'), '*****@*****.**')

        connection2 = mail.get_connection()
        connection2.send_messages([msg])
        self.assertEqual(len(os.listdir(self.tmp_dir)), 2)

        connection.send_messages([msg])
        self.assertEqual(len(os.listdir(self.tmp_dir)), 2)

        msg.connection = mail.get_connection()
        self.assertTrue(connection.open())
        msg.send()
        self.assertEqual(len(os.listdir(self.tmp_dir)), 3)
        msg.send()
        self.assertEqual(len(os.listdir(self.tmp_dir)), 3)

        connection.close()
Ejemplo n.º 16
0
def parse_from_file(path):
    """Parse a single message from a file.

    :param path: The full path the the email file.
    :type path: str
    :return dict A dictionary with all the necessary data.
    """
    data = None

    # Although we don't write anything into the file, we need to make
    # sure we can remove it.
    if os.access(path, os.R_OK | os.W_OK):
        with io.open(path, mode="rb") as read_file:
            mail = email.message_from_binary_file(read_file)

        data = extract_mail_values(mail)

        try:
            os.unlink(path)
        except PermissionError:
            log.error("Error removing file at '%s'", path)
    else:
        log.warning("Cannot access in 'rw' mode the file at '%s'", path)

    return data
Ejemplo n.º 17
0
def parse_mail(fileobj):
    headers, body = {}, []
    msg = email.message_from_binary_file(fileobj)

    for k, v in msg.items():
        headers[k] = quoted_printable(v).replace('\n',
                                                 '').replace('\t',
                                                             ' ').strip()

    for line in email.iterators.body_line_iterator(msg):
        body.append(line.replace("\n", ""))

    # Merge lines joined with "=\n"
    i = len(body) - 1
    while i > 0:
        i -= 1
        prev = body[i]
        if len(prev) == 74 and prev.endswith('='):
            body[i] = body[i][:-1] + body[i + 1]
            del body[i + 1]

    # Remove =20 from end of lines
    i = 0
    while i < len(body):
        if body[i].endswith('=20'):
            body[i] = body[i][:-3] + ' '
        i += 1

    return headers, body
Ejemplo n.º 18
0
 def _process_multipart_body(self, mimeinput, charset):
     headers = io.BytesIO()
     lines = mimeinput.readpart()
     for line in lines:
         headers.write(line)
         if line == b'\r\n':
             break
     headers.seek(0)
     headers = email.message_from_binary_file(headers)
     ctype, ctype_params = parse_header(headers.get('content-type', ''))
     if ctype and 'charset' in ctype_params:
         charset = ctype_params['charset']
     cdisp, cdisp_params = parse_header(
         headers.get('content-disposition', ''))
     if not cdisp:
         raise RequestError('expected Content-Disposition header')
     name = cdisp_params.get('name')
     filename = cdisp_params.get('filename')
     if not (cdisp == 'form-data' and name):
         raise RequestError('expected Content-Disposition: form-data'
                            'with a "name" parameter: got %r' %
                            headers.get('content-disposition', ''))
     # FIXME: should really to handle Content-Transfer-Encoding and other
     # MIME complexity here.  See RFC2048 for the full horror story.
     if filename:
         # it might be large file upload so use a temporary file
         upload = Upload(filename, ctype, charset)
         upload.receive(lines)
         _add_field_value(self.form, name, upload)
     else:
         value = _decode_string(b''.join(lines), charset or self.charset)
         _add_field_value(self.form, name, value)
Ejemplo n.º 19
0
def get_time(path):
    with open(path, 'rb') as source:
        email = message_from_binary_file(source)
        if email[HDR_DATE]:
            return time_from_date(email[HDR_DATE])
        else:
            raise BadTime()
Ejemplo n.º 20
0
 def handle(self, *args, **options):
     monitoring = Monitoring.objects.get(pk=options["monitoring_pk"])
     for case in monitoring.case_set.all():
         ids = set()
         for letter in (Letter.objects.filter(
                 record__case=case.pk).is_incoming().all()):
             self.stdout.write("Processing letter: {}".format(letter.pk))
             if not letter.eml:
                 self.stdout.write("Skipping {} due missing eml.".format(
                     letter.pk))
                 continue
             content = letter.eml.file.read()
             fp = BytesIO(content)
             if b"Subject:" not in content:
                 fp = gzip.GzipFile(fileobj=fp)
             msg = email.message_from_binary_file(fp)
             msg_id = msg.get("Message-ID")
             if not msg_id:
                 self.stdout.write(
                     "Skipping {} due missing 'Message-ID'.".format(
                         letter.pk))
                 continue
             if msg_id not in ids:
                 self.stdout.write(
                     "Skipping {} due unique 'Message-ID': {}".format(
                         letter.pk, msg_id))
                 ids.add(msg_id)
                 continue
             self.stdout.write(
                 "Removing {} due duplicated 'Message-ID': {}".format(
                     letter.pk, msg_id))
             if options["delete"]:
                 letter.delete()
Ejemplo n.º 21
0
def large_email():
    """A large email with several images as attachments"""
    with open(TESTS_HERE / "data/1604232551.M582355P21675Q3.hubhero-demo",
              "rb") as fp:
        msg = email.message_from_binary_file(fp, policy=default)

    yield msg
Ejemplo n.º 22
0
def parse_from_file(path):
    """Parse a single message from a file.

    :param path: The full path the the email file.
    :type path: str
    :return dict A dictionary with all the necessary data.
    """
    data = None

    # Although we don't write anything into the file, we need to make
    # sure we can remove it.
    if os.access(path, os.R_OK | os.W_OK):
        with io.open(path, mode="rb") as read_file:
            mail = email.message_from_binary_file(read_file)

        data = extract_mail_values(mail)

        try:
            os.unlink(path)
        except PermissionError:
            log.error("Error removing file at '%s'", path)
    else:
        log.warning("Cannot access in 'rw' mode the file at '%s'", path)

    return data
Ejemplo n.º 23
0
def parse_mail(file_in):
    """
        Extract Subject & Body of mail file
        headers must be formatted as a block of RFC 2822 style

        input:  file path
        output: dict
    """

    # We open the file and then divide it in different parts.
    with open(file_in, 'rb') as INFILE:
        raw_mail = email.message_from_binary_file(INFILE)
        charset = raw_mail.get_charsets()[0]
        formated_mail = {
            "body": raw_mail.get_payload(decode=True).decode(charset),
            "subject": str(email.header.make_header(email.header.decode_header(raw_mail["Subject"])))
,
            "encoding": raw_mail['content-type']
        }

    date = os.path.dirname(file_in).split('/').pop() + '-'
    name = os.path.splitext(os.path.basename(file_in))[0]
    formated_mail['name'] = date+name

    formated_mail = dc_remove_adresses(formated_mail)
    formated_mail = dc_remove_url(formated_mail)

    return formated_mail
 def test_bogus_content_charset(self):
     with path('mailman.email.tests.data', 'bad_email_3.eml') as email_path:
         with open(str(email_path), 'rb') as fp:
             msg = message_from_binary_file(fp, Message)
             fp.seek(0)
             text = fp.read().decode('ascii', 'replace')
     self.assertEqual(msg.as_string(), text)
Ejemplo n.º 25
0
def parse_blob_info(field_storage):
    """Parse a BlobInfo record from file upload field_storage.

  Args:
    field_storage: cgi.FieldStorage that represents uploaded blob.

  Returns:
    BlobInfo record as parsed from the field-storage instance.
    None if there was no field_storage.

  Raises:
    BlobInfoParseError when provided field_storage does not contain enough
    information to construct a BlobInfo object.
  """
    if field_storage is None:
        return None

    field_name = field_storage.name

    def get_value(dct, name):
        value = dct.get(name, None)
        if value is None:
            raise BlobInfoParseError('Field %s has no %s.' %
                                     (field_name, name))
        return value

    filename = get_value(field_storage.disposition_options, 'filename')
    blob_key_str = get_value(field_storage.type_options, 'blob-key')
    blob_key = BlobKey(blob_key_str)

    if six.PY2:
        upload_content = email.message_from_file(field_storage.file)
    else:
        upload_content = email.message_from_binary_file(field_storage.file)

    content_type = get_value(upload_content, 'content-type')
    size = get_value(upload_content, 'content-length')
    creation_string = get_value(upload_content, UPLOAD_INFO_CREATION_HEADER)
    md5_hash_encoded = get_value(upload_content, 'content-md5')
    md5_hash = base64.urlsafe_b64decode(md5_hash_encoded)

    try:
        size = int(size)
    except (TypeError, ValueError):
        raise BlobInfoParseError('%s is not a valid value for %s size.' %
                                 (size, field_name))

    try:
        creation = blobstore._parse_creation(creation_string, field_name)
    except blobstore._CreationFormatError as err:
        raise BlobInfoParseError(str(err))

    return BlobInfo(
        id=blob_key_str,
        content_type=content_type,
        creation=creation,
        filename=filename,
        size=size,
        md5_hash=md5_hash,
    )
Ejemplo n.º 26
0
    def timerEvent(self, ev):
        if ev.timerId() != self.timer.timerId():
            super(ExcerptBuilder, self).timerEvent(ev)
            return

        message_id, filename = self.queue.popleft()

        if not self.queue:
            self.timer.stop()

        if not hasattr(notmuch.Message, 'add_property'):
            # FIXME remove condition when function is integrated in notmuch bindings
            return

        with open(filename, 'rb') as fp:
            pymessage = email.message_from_binary_file(
                fp, policy=email.policy.default)

        text = self._getExcerptPlainText(pymessage)
        if text is None:
            if HAS_HTML2TEXT:
                text = self._getExcerptHtml(pymessage)
            else:
                return
        if not text:
            text = ''

        text = re.sub(r'\s+', ' ', text)
        text = text[:100]

        with open_db_rw() as db:
            message = db.find_message(message_id)
            message.add_property(self.PROPERTY, text)
            self.builtExcerpt.emit(message_id, text)
Ejemplo n.º 27
0
    def test_write_mime_extra_headers(self):
        '''write_mime() with extra headers.'''

        pr = problem_report.ProblemReport(date='now!')
        pr['Simple'] = 'bar'
        pr['TwoLine'] = 'first\nsecond\n'
        io = BytesIO()
        pr.write_mime(io,
                      extra_headers={
                          'Greeting': 'hello world',
                          'Foo': 'Bar'
                      })
        io.seek(0)

        msg = email.message_from_binary_file(io)
        self.assertEqual(msg['Greeting'], 'hello world')
        self.assertEqual(msg['Foo'], 'Bar')
        parts = [p for p in msg.walk()]
        self.assertEqual(len(parts), 2)

        # first part is the multipart container
        self.assertTrue(parts[0].is_multipart())

        # second part should be an inline text/plain attachments with all short
        # fields
        self.assertTrue(not parts[1].is_multipart())
        self.assertEqual(parts[1].get_content_type(), 'text/plain')
        self.assertTrue(b'Simple: bar' in parts[1].get_payload(decode=True))
Ejemplo n.º 28
0
 def test_simple_utf8_file(self):
     mail = email.message_from_binary_file(
         open('tests/static/mail/utf8.eml', 'rb'),
         _class=email.message.EmailMessage)
     actual = utils.extract_body(mail)
     expected = "Liebe Grüße!\n"
     self.assertEqual(actual, expected)
Ejemplo n.º 29
0
def message2email(msg: Message) -> EmailMessage:
    """
    Convert an instance of the old `Message` class (or one of its subclasses,
    like a `mailbox` message class) to an instance of the new `EmailMessage`
    class with the ``default`` policy.  If ``msg`` is already an
    `EmailMessage`, it is returned unchanged.
    """
    if isinstance(msg, EmailMessage):
        return msg
    # Message.as_bytes() refolds long header lines (which can result in changes
    # in whitespace after reparsing) and doesn't give a way to change this, so
    # we need to use a BytesGenerator manually.
    fp = BytesIO()
    # TODO: Instead of maxheaderlen, use a policy with refold_source=None?
    g = BytesGenerator(fp, mangle_from_=False, maxheaderlen=0)
    g.flatten(msg, unixfrom=msg.get_unixfrom() is not None)
    fp.seek(0)
    emsg = email.message_from_binary_file(fp, policy=policy.default)
    assert isinstance(emsg, EmailMessage)
    # MMDFMessage and mboxMessage make their "From " lines available though a
    # different method than normal Messages, so we have to copy it over
    # manually.
    if isinstance(msg, (MMDFMessage, mboxMessage)):
        emsg.set_unixfrom("From " + msg.get_from())
    return emsg
Ejemplo n.º 30
0
def decode_email(eml_file: str,
                 include_raw_body: bool = False,
                 include_attachment_data: bool = False,
                 pconf: typing.Optional[dict] = None,
                 policy: email.policy.Policy = email.policy.default) -> dict:
    """Function for decoding an EML file into an easily parsable structure.
    Some intelligence is applied while parsing the file in order to work around
    broken files.
    Besides just parsing, this function also computes hashes and extracts meta
    information from the source file.

    Args:
      eml_file (str): Full absolute path to the file to be parsed.
      include_raw_body (bool, optional): Boolean paramter which indicates whether
                                         to include the original file contents in
                                         the returned structure. Default is False.
      include_attachment_data (bool, optional): Boolean paramter which indicates whether
                                                to include raw attachment data in the
                                                returned structure. Default is False.
      pconf (dict, optional): A dict with various optinal configuration parameters,
                              e.g. whitelist IPs, whitelist e-mail addresses, etc.

      policy (email.policy.Policy, optional): Policy to use when parsing e-mails.
            Default = email.policy.default.

    Returns:
      dict: A dictionary with the content of the EML parsed and broken down into
            key-value pairs.
    """
    with open(eml_file, 'rb') as fp:
        msg = email.message_from_binary_file(fp, policy=policy)

    return parse_email(msg, include_raw_body, include_attachment_data, pconf)
Ejemplo n.º 31
0
    def test_file_sessions(self):
        """Make sure opening a connection creates a new file"""
        msg = EmailMessage('Subject',
                           'Content',
                           '*****@*****.**', ['*****@*****.**'],
                           headers={'From': '*****@*****.**'})
        connection = mail.get_connection()
        connection.send_messages([msg])

        self.assertEqual(len(os.listdir(self.tmp_dir)), 1)
        with open(os.path.join(self.tmp_dir,
                               os.listdir(self.tmp_dir)[0]), 'rb') as fp:
            message = message_from_binary_file(fp)
        self.assertEqual(message.get_content_type(), 'text/plain')
        self.assertEqual(message.get('subject'), 'Subject')
        self.assertEqual(message.get('from'), '*****@*****.**')
        self.assertEqual(message.get('to'), '*****@*****.**')

        connection2 = mail.get_connection()
        connection2.send_messages([msg])
        self.assertEqual(len(os.listdir(self.tmp_dir)), 2)

        connection.send_messages([msg])
        self.assertEqual(len(os.listdir(self.tmp_dir)), 2)

        msg.connection = mail.get_connection()
        self.assertTrue(connection.open())
        msg.send()
        self.assertEqual(len(os.listdir(self.tmp_dir)), 3)
        msg.send()
        self.assertEqual(len(os.listdir(self.tmp_dir)), 3)

        connection.close()
Ejemplo n.º 32
0
 def __init__(self, filepath, category=None):
     self.filepath = filepath
     self.category = category
     if sys.version_info > (3, 0):
         self.mail = email.message_from_binary_file(filepath)
     else:
         self.mail = email.message_from_file(self.filepath)
Ejemplo n.º 33
0
def unsubscribe_message():
    """
    Pytest fixture for unsubscribe email
    """
    with open("./test_data/unsubscribe_email.eml", "rb") as f:
        message = email.message_from_binary_file(f)
        return message
Ejemplo n.º 34
0
def message_with_code():
    """
    Pytest fixture
    """
    with open("./test_data/code_email.eml", "rb") as f:
        message = email.message_from_binary_file(f)
        return message
Ejemplo n.º 35
0
def message_with_one_attachment():
    """
    Pytest fixture 
    """
    with open("./test_data/pdf_one_email.eml", "rb") as f:
        message = email.message_from_binary_file(f)
        return message
Ejemplo n.º 36
0
def from_file(fileobj):
    """Reads an email and cleans it up to make a MailBase."""
    try:
        msg = email.message_from_file(fileobj)
    except TypeError:
        fileobj.seek(0)
        msg = email.message_from_binary_file(fileobj)
    return from_message(msg)
Ejemplo n.º 37
0
def load_mail(file_path):
    if six.PY3:
        with open(file_path, 'rb') as f:
            mail = email.message_from_binary_file(f)
    else:
        with open(file_path) as f:
            mail = email.message_from_file(f)
    return mail
Ejemplo n.º 38
0
 def __init__(self, infile, category=None):
   self.category = category
   if sys.version_info > (3, 0):
     # Python 3 code in this block
     self.mail = email.message_from_binary_file(infile)
   else:
     # Python 2 code in this block
     self.mail = email.message_from_file(infile)
def parse_message_id(file_):
    '''Returns the message id for a given file.
    It is assumed that file represents a valid RFC822 message'''
    msg = email.message_from_binary_file(file_)
    msg_id = ""
    if "Message-ID" in msg:
        msg_id = msg["Message-ID"].strip("<>")
    return msg_id
Ejemplo n.º 40
0
def load_mail(file_path):
    if six.PY3:
        with open(file_path, 'rb') as f:
            mail = email.message_from_binary_file(f)
    else:
        with open(file_path) as f:
            mail = email.message_from_file(f)
    return mail
Ejemplo n.º 41
0
 def message(self):
     """ Read mail, parse it and return a Message instance. """
     logger.debug("Parsing mail at {} ...".format(self.path))
     with open(self.path, 'rb') as mail_file:
         if PY2:
             message = email.message_from_file(mail_file)
         else:
             message = email.message_from_binary_file(mail_file)
     return message
Ejemplo n.º 42
0
def get_msgs_from_maildir(maildir, cachefile):
    mails = []
    for f in os.listdir(maildir):
        with open(os.path.join(maildir, f), 'rb') as fp:
            mails.append(email.message_from_binary_file(fp)) #TODO: insort()

    set_cache(cachefile, mails)

    return mails
Ejemplo n.º 43
0
def email_html(file):
  infile = open(file, "rb")
  message = email.message_from_binary_file(infile)
  if message.is_multipart():
    for part in message.get_payload():
      body = part.get_payload()
      break
  else:
    body = message.get_payload(decode=True)
  return quopri.decodestring(body).decode()
Ejemplo n.º 44
0
def message_from_binary_file(fp, *args, **kws):
    """
    Read a binary file and parse its contents into a L{PyzMessage} object model.
    B{(Python >= 3.2)}
    @type fp: binary_file
    @param fp: the input file, must be open in binary mode 
    @rtype: L{PyzMessage}
    @return: the L{PyzMessage} object
    """
    return PyzMessage(email.message_from_binary_file(fp, *args, **kws))
Ejemplo n.º 45
0
    def handle(self, *args, **options):
        infile = args[0] if args else options['infile']

        try:
            if infile:
                logger.info('Parsing mail loaded by filename')
                if six.PY3:
                    with open(infile, 'rb') as file_:
                        mail = email.message_from_binary_file(file_)
                else:
                    with open(infile) as file_:
                        mail = email.message_from_file(file_)
            else:
                logger.info('Parsing mail loaded from stdin')
                if six.PY3:
                    mail = email.message_from_binary_file(sys.stdin.buffer)
                else:
                    mail = email.message_from_file(sys.stdin)
        except AttributeError:
            logger.warning("Broken email ignored")
            return

        # it's important to get exit codes correct here. The key is to allow
        # proper separation of real errors vs expected 'failures'.
        #
        # patch/comment parsed:        0
        # no parseable content found:  0
        # duplicate messages:          0
        # db integrity/other db error: 1
        # broken email (ValueError):   1 (this could be noisy, if it's an issue
        #                                 we could use a different return code)
        try:
            result = parse_mail(mail, options['list_id'])
            if result is None:
                logger.warning('Nothing added to database')
        except DuplicateMailError as exc:
            logger.warning('Duplicate mail for message ID %s', exc.msgid)
        except (ValueError, Exception) as exc:
            logger.exception('Error when parsing incoming email: %s',
                             exc.message,
                             extra={'mail': mail.as_string()})
            sys.exit(1)
    def test_file_with_attachement(self):
        with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data/test_attachment.eml'), mode='br') as msg:
            imap_client = yield from self.login_user('user@mail', 'pass', select=True)
            mail = Mail(email.message_from_binary_file(msg))

            self.imapserver.receive(mail, imap_user='******')

            result, data = yield from imap_client.fetch('1', '(RFC822)')

            self.assertEqual('OK', result)
            self.assertEqual(['1 FETCH (RFC822 {418898}', mail.as_bytes(), ')', 'FETCH completed.'], data)
Ejemplo n.º 47
0
    def _parse_email_fixture(self, fixture_path: str) -> Message:
        if not self._does_fixture_path_exist(fixture_path):
            print('Fixture {} does not exist'.format(fixture_path))
            exit(1)

        if fixture_path.endswith('.json'):
            message = self._parse_email_json_fixture(fixture_path)
        else:
            with open(fixture_path, "rb") as fp:
                message = email.message_from_binary_file(fp)

        return message
    def import_package_news(self, package_directory_name):
        package_name = os.path.basename(package_directory_name)
        self.write('Processing package {pkg}...'.format(pkg=package_name))

        try:
            package = PackageName.objects.get(name=package_name)
        except PackageName.DoesNotExist:
            self.write('Package does not exist. Skipping messages...')
            return

        news_directory = os.path.join(package_directory_name, 'news')
        if not os.path.exists(news_directory):
            self.write("Package has no news directory.")
            return

        email_news = []
        for news_file in sorted(os.listdir(news_directory)):
            news_file_path = os.path.join(news_directory, news_file)

            try:
                with open(news_file_path, 'rb') as f:
                    if hasattr(email, 'message_from_binary_file'):
                        msg = email.message_from_binary_file(f)
                    else:
                        msg = email.message_from_file(f)
                if 'Date' in msg:
                    timestamp = email.utils.mktime_tz(
                        email.utils.parsedate_tz(msg['Date']))
                    date = datetime.utcfromtimestamp(timestamp)
                    date = timezone.make_aware(date, timezone.utc)
                else:
                    date = timezone.now()

                news_kwargs = EmailNews.get_email_news_parameters(msg)
                content = news_kwargs.pop('file_content')
                news_kwargs['news_file'] = ContentFile(content,
                                                       name='news-file')

                email_news.append(News(
                    package=package,
                    datetime_created=date,
                    **news_kwargs))
            except Exception:
                import traceback
                traceback.print_exc()
                self.write('Problem importing news {}'.format(news_file_path))

        self.write("All news for the package processed. "
                   "Bulk creating the instances.")
        News.objects.bulk_create(email_news)

        self.write('Complete.')
Ejemplo n.º 49
0
    def get_message_parts(self):
        """Output like notmuch show"""
        fp = open(self.get_filename(), 'rb')
        if sys.version_info[0] < 3:
            email_msg = email.message_from_file(fp)
        else:
            email_msg = email.message_from_binary_file(fp)
        fp.close()

        out = []
        for msg in email_msg.walk():
            if not msg.is_multipart():
                out.append(msg)
        return out
Ejemplo n.º 50
0
def read_mail(filename, project=None):
    """Read a mail from a file."""
    file_path = os.path.join(TEST_MAIL_DIR, filename)
    if six.PY3:
        with open(file_path, 'rb') as f:
            mail = email.message_from_binary_file(f)
    else:
        with open(file_path) as f:
            mail = email.message_from_file(f)
    if 'Message-Id' not in mail:
        mail['Message-Id'] = make_msgid()
    if project:
        mail['List-Id'] = project.listid
    return mail
Ejemplo n.º 51
0
    def mail(self):
        if self._mail is None:
            if hasattr(email, 'message_from_binary_file'):
                self._mail = email.message_from_binary_file(
                    open(self._msg.get_filename(), 'br'))
            else:
                if (3, 1) <= sys.version_info < (3, 2):
                    fp = codecs.open(self._msg.get_filename(),
                                     'r', 'utf-8', errors='replace')
                else:
                    fp = open(self._msg.get_filename())

                    self._mail = email.message_from_file(fp)

        return self._mail
Ejemplo n.º 52
0
def m(increment: ('prev', 'show', 'next'),
      thing = None, *,
      showproc = None, showmimeproc = None,
      nocheckmime: bool = False,
      noheader: bool = False,
      draft: bool = False,
      maildir = None):
    '''
    Show messages

    :param increment: Previous, current, or next message
    :param thing: Folder (+folder) or message (message-id) 
        to display, defaults current message in current folder
    :param showproc: Program to display text messages, in case
        you don't want to use the default
    :param showmimeproc: Program to display MIME messages,
        in case you don't want to use the default
    :param nocheckmime: Don't check for MIME messages
    :param noheader: Don't display the header
    :param draft: I don't understand this one.
    '''
    configuration = db.read_configuration()
    if not maildir:
        maildir = configuration['maildir']

    mhdir = db.MHDir(maildir)
    if thing:
        if thing.startswith('+'):
            mhdir.folder = thing[1:]
        else:
            mhdir.message = thing

    if not (thing or mhdir.has_current_folder()):
        sys.stderr.write('''Set the current folder, like this.

  m show +INBOX

''')
        exit(1)

    if mhdir.message:
        if increment in {'prev', 'next'}:
            mhdir.message = db.prev_cur_next(mhdir.message)[increment]
        with mhdir.message.open('rb') as fp:
            message = message_from_binary_file(fp)
        print(parse.body(message))
    else:
        print('No messages')
Ejemplo n.º 53
0
def gauge_email_sentiment(file, input_dir, positives, negatives, header, analyzer):
    with open(os.path.join(input_dir,file), 'rb') as email_fp:
        msg = email.message_from_binary_file(email_fp, policy=default)
    msg_body = msg.get_body(preferencelist=('plain'))
    # We include the subject in scoring the message
    score = analyzer.polarity_scores("{} {}".format(msg['subject'], msg_body.get_content()))
    # This would score without the subject
    # score = analyzer.polarity_scores(msg_body.get_content())
    # Put the scores in the envelope for later use
    msg.add_header(header, str(score))
    # Decide where to put the message
    if score['compound'] < 0:
        output_path = negatives
    else:
        output_path = positives
    # Write the message out, using the same filename
    with open(os.path.join(output_path,file), "wb") as out_fp:
        out_fp.write(msg.as_bytes())
Ejemplo n.º 54
0
    def parse(self):
        try:
            if self.message_type == 'string':
                msg = email.message_from_string(self.raw_message, Message)
            elif self.message_type == 'bytes':
                msg = email.message_from_bytes(self.raw_message, Message)
            elif self.message_type == 'binary_file':
                msg = email.message_from_binary_file(self.raw_message, Message)
            else:
                raise ValueError('Invalid message_type, could not parse message.')
        except Exception:
            raise ParseEmailException

        # Do basic post-processing of the message, checking it for defects or
        # other missing information.
        if msg.defects:
            raise DefectMessageException

        # Add headers used in Holonet
        msg.original_size = len(self.raw_message)
        msg['X-MailFrom'] = self.mail_from

        return msg
Ejemplo n.º 55
0
    def smart_parser(input):
        """
        Use the appropriate parser and return a email.message.Message object
        (this is not a L{PyzMessage} object)

        @type input: string, file, bytes, binary_file or  email.message.Message
        @param input: the source of the message
        @rtype: email.message.Message
        @returns: the message
        """
        if isinstance(input, email.message.Message):
            return input

        if sys.version_info < (3, 0):
            # python 2.x
            if isinstance(input, basestring):
                return email.message_from_string(input)
            elif hasattr(input, 'read') and hasattr(input, 'readline'):
                return email.message_from_file(input)
            else:
                raise ValueError, 'input must be a string, a file or a Message'
        else:
            # python 3.x
            if isinstance(input, str):
                return email.message_from_string(input)
            elif isinstance(input, bytes):
                # python >= 3.2 only
                return email.message_from_bytes(input)
            elif hasattr(input, 'read') and hasattr(input, 'readline'):
                if hasattr(input, 'encoding'):
                    # python >= 3.2 only
                    return email.message_from_file(input)
                else:
                    return email.message_from_binary_file(input)
            else:
                raise ValueError,\
                    'input must be a string a bytes, a file or a Message'
Ejemplo n.º 56
0
    def test_write_mime_filter(self):
        '''write_mime() with key filters.'''

        pr = problem_report.ProblemReport(date='now!')
        pr['GoodText'] = 'Hi'
        pr['BadText'] = 'YouDontSeeMe'
        pr['GoodBin'] = bin_data
        pr['BadBin'] = 'Y' + '\x05' * 10 + '-'
        io = BytesIO()
        pr.write_mime(io, skip_keys=['BadText', 'BadBin'])
        io.seek(0)

        msg = email.message_from_binary_file(io)
        parts = [p for p in msg.walk()]
        self.assertEqual(len(parts), 3)

        # first part is the multipart container
        self.assertTrue(parts[0].is_multipart())

        # second part should be an inline text/plain attachments with all short
        # fields
        self.assertTrue(not parts[1].is_multipart())
        self.assertEqual(parts[1].get_content_type(), 'text/plain')
        self.assertEqual(parts[1].get_content_charset(), 'utf-8')
        self.assertEqual(parts[1].get_filename(), None)
        self.assertEqual(parts[1].get_payload(decode=True), b'''ProblemType: Crash
Date: now!
GoodText: Hi
''')

        # third part should be the GoodBin: field as attachment
        self.assertTrue(not parts[2].is_multipart())
        f = tempfile.TemporaryFile()
        f.write(parts[2].get_payload(decode=True))
        f.seek(0)
        self.assertEqual(gzip.GzipFile(mode='rb', fileobj=f).read(), bin_data)
        f.close()
Ejemplo n.º 57
0
Archivo: utils.py Proyecto: 3mdeb/afew
def extract_mail_body(message):
    r'''
    Extract the plain text body of the message with signatures
    stripped off.

    :param message: the message to extract the body from
    :type  message: :class:`notmuch.Message`
    :returns: the extracted text body
    :rtype:   :class:`list` of :class:`str`
    '''
    if hasattr(email, 'message_from_binary_file'):
        mail = email.message_from_binary_file(open(message.get_filename(), 'br'))
    else:
        if (3, 1) <= sys.version_info < (3, 2):
            fp = codecs.open(message.get_filename(), 'r', 'utf-8', errors='replace')
        else:
            fp = open(message.get_filename())
        mail = email.message_from_file(fp)

    content = []
    for part in mail.walk():
        if part.get_content_type() == 'text/plain':
            raw_payload = part.get_payload(decode=True)
            encoding = part.get_content_charset()
            if encoding:
                try:
                    raw_payload = raw_payload.decode(encoding, 'replace')
                except LookupError:
                    raw_payload = raw_payload.decode(sys.getdefaultencoding(), 'replace')
            else:
                raw_payload = raw_payload.decode(sys.getdefaultencoding(), 'replace')

            lines = raw_payload.split('\n')
            lines = strip_signatures(lines)

            content.append('\n'.join(lines))
    return '\n'.join(content)
Ejemplo n.º 58
0
def main():
    parser = ArgumentParser(description="""\
Unpack a MIME message into a directory of files.
""")
    parser.add_argument('-d', '--directory', required=True,
                        help="""Unpack the MIME message into the named
                        directory, which will be created if it doesn't already
                        exist.""")
    parser.add_argument('msgfile')
    args = parser.parse_args()

    with open(args.msgfile, 'rb') as fp:
        msg = email.message_from_binary_file(fp, policy=default)

    try:
        os.mkdir(args.directory)
    except FileExistsError:
        pass

    counter = 1
    for part in msg.walk():
        # multipart/* are just containers
        if part.get_content_maintype() == 'multipart':
            continue
        # Applications should really sanitize the given filename so that an
        # email message can't be used to overwrite important files
        filename = part.get_filename()
        if not filename:
            ext = mimetypes.guess_extension(part.get_content_type())
            if not ext:
                # Use a generic bag-of-bits extension
                ext = '.bin'
            filename = 'part-%03d%s' % (counter, ext)
        counter += 1
        with open(os.path.join(args.directory, filename), 'wb') as fp:
            fp.write(part.get_payload(decode=True))
Ejemplo n.º 59
0
#!/usr/bin/python3
import sys
import email

v = []
m = email.message_from_binary_file(sys.stdin.buffer)
for hdr in ('to', 'from', 'cc'):
    r = m.get_all(hdr, [])
    for addr in r:
        v.append(email.utils.parseaddr(addr))
for realname, addr in v:
    print('%s\t%s' % (addr, realname.replace('\t', ' ')))