Ejemplo n.º 1
0
    def read(self, source_path):
        """
        Parse, and return (content, metadata)
        """
        parser = email.parser.Parser()
        with pelican_open(source_path) as source:
            message = parser.parsestr(source)
        location = message.get('location')
        if not location:
            raise ValueError(
                u"RedirectReader requires a 'location' header in the file")

        delay = float(message.get('delay', 0))

        metadata = {
            'title': message.get('title', u''),
            'location': location,
            'delay': delay,
            'status': message.get('status', 'hidden')
        }

        # Slug is important because it Pelican's slugification affects
        # the final URL, and we care about exact URLs here.
        # So for redirect files, we assume that you named them carefully.
        # And allow overriding by explicit slug.
        slug = message.get('slug')
        if not slug:
            slug = os.path.splitext(os.path.basename(source_path))[0]
        if slug:
            metadata['slug'] = slug

        content = message.get_payload().strip()
        return content, metadata
Ejemplo n.º 2
0
def extract_messages(path, addressbook, outboxes, verbose):
    """ Extracts e-mails from the Enron corpus """
    # Set ensures messages are unique
    messages = set()
    parser = email.parser.Parser()
    outbox_re = [re.compile(r) for r in ['sent_items$', 'sent$', 'sent_mail$']]
    for root, _, files in os.walk(path):
        # Only parse messages in 'sent' folder
        if outboxes and not any(re.search(root) for re in outbox_re):
            continue
        if verbose:
            print(root)
        for message_file in files:
            path = os.path.join(root, message_file)
            with codecs.open(path, 'r', 'Latin-1') as message_file:
                message = parser.parsestr(message_file.read())
                # Resolve senders and recipients
                sender = message['From']
                if sender not in addressbook:
                    continue
                recipients = []
                if message['To'] is not None:
                    recipients += [m.strip(',') for m in message['To'].split()]
                if message['Cc'] is not None:
                    recipients += [m.strip(',') for m in message['Cc'].split()]
                if message['Bcc'] is not None:
                    recipients += [m.strip(',') for m in message['Bcc'].split()]
                # Only include recipients in addressbook
                wl_recipients = tuple(addressbook[r] for r in recipients if r in addressbook)
                if len(wl_recipients) == 0:
                    continue
                messages.add(Message(sender, wl_recipients, dateutil.parser.parse(message['Date'])))
    return sorted(messages, key=lambda x: x.timestamp)
Ejemplo n.º 3
0
def get_parts(response):
    """Extract parts from headers.

    Params:
        response: a request object
    Returns:
        an array of content-ids
    """
    head_lines = ''
    for k, v in response.raw.getheaders().iteritems():
        head_lines += str(k) + ':' + str(v) + '\n'

    full = head_lines + response.content

    parser = email.parser.Parser()
    decoded_reply = parser.parsestr(full)
    parts = {}
    start = decoded_reply.get_param('start').lstrip('<').rstrip('>')
    i = 0
    for part in decoded_reply.get_payload():
        cid = part.get('content-Id', '').lstrip('<').rstrip('>')
        if (not start or start == cid) and 'start' not in parts:
            parts['start'] = part.get_payload()
        else:
            parts[cid or 'Attachment%d' % i] = part.get_payload()
        i += 1
    return parts
Ejemplo n.º 4
0
    def __init__(self, config, body, rcpts, parse_rcpts=False, efrom=None):
        """Create a new message to the given recipients containing the text
        given in `body`.  If no envelope sender address is passed in `efrom`,
        it is guessed from the message body.  If `parse_rcpts` is true, the
        message body is parsed for additional recipients."""
        self.rcpts = set()
        for rcpt in rcpts:
            name, addr = email.utils.parseaddr(rcpt)
            if addr:
                self.rcpts.add(addr)

        parser = email.parser.Parser()
        self.message = parser.parsestr(self.received() + body, True)

        if efrom is None:
            if not self.message.has_key("from"):
                default_from = config.get_general(str, "default_from")
                if default_from:
                    self.message["From"] = default_from
            self.efrom = self.guess_envelope_from()
        else:
            self.efrom = efrom

        if not self.message.has_key("from"):
            self.message["From"] = self.efrom

        if parse_rcpts:
            self.add_recipient_addresses()

        self.fix_headers()
Ejemplo n.º 5
0
def fetchmailbody(mail_detail, result):
        result['error'] = 0
        result['data'] = {};
#        server.debug = 5;
        message = []
        message.append(mail_detail[u'uid'])
        body_section = 'BODY.PEEK[' + mail_detail['section_id'] + ']'
        body_field = 'BODY[' + mail_detail['section_id'] + ']'
        body_mime = 'BODY[' + mail_detail['section_id'] + '.MIME]'
        response = server.fetch(message, [body_section, 'RFC822.HEADER', body_mime])
        result['data']['body'] = ''
        result['data']['is_html'] = 0
        for msgid, data in response.iteritems():
	    email_eml = data[str(body_mime)].encode('utf-8', 'replace')  + data[str(body_field)].encode('utf-8', 'replace')
	    mail = email.message_from_string(email_eml)
	    for part in mail.walk():
		if part.get_content_charset() is None:
        	   charset = chardet.detect(str(part))['encoding']
        	else:
            	   charset = part.get_content_charset()	
		result['data']['body'] = unicode(part.get_payload(decode=True),str(charset),"ignore").encode('utf8','replace')
		if part.get_content_type() == 'text/html':
		    result['data']['is_html'] = 1
        	if part.get_content_type() == 'text/plain':
		    result['data']['is_html'] = 0
	    parser = email.parser.HeaderParser()
            headers = parser.parsestr(data[u'RFC822.HEADER'].encode('utf-8', 'replace'))
            result['data']['from'] = headers['FROM'].encode('utf-8', 'replace')
            result['data']['to'] = headers['TO'].encode('utf-8', 'replace')
	    if (headers['CC']):
                result['data']['cc'] = headers['CC'].encode('utf-8', 'replace')
	    if (headers['BCC']):
                result['data']['bcc'] = headers['BCC'].encode('utf-8', 'replace')
            result['data']['time'] = int(time.mktime(email.utils.parsedate(headers['DATE'])))
            result['data']['subject'] = headers['SUBJECT'].encode('utf-8', 'replace')
Ejemplo n.º 6
0
    async def parse_headers(cls, reader):
        '''
        Read HTTP header data from `reader`. This code is a port of the
        HTTP header parsing code from the Python standard library; it
        has been modified to use asyncio.
        https://github.com/python/cpython/blob/3.6/Lib/http/client.py

        :return: a dict of headers and values.
        :raises: ValueError if a line longer than MAXLINE characters is
          discovered.
        :raises: ValueError if more than MAXHEADERS headers are
          discovered.
        '''
        headers = []
        while True:
            line = await reader.readline()
            if len(line) > cls.MAXLINE:
                raise ValueError('Line too long while parsing header')
            headers.append(line)
            if len(headers) > cls.MAXHEADERS:
                raise ValueError('Too many headers found while parsing')
            if line in (b'\r\n', b'\n', b''):
                break

        hstring = b''.join(headers).decode('iso-8859-1')
        parser = email.parser.Parser(_class=http.client.HTTPMessage)
        return parser.parsestr(hstring)
Ejemplo n.º 7
0
def extract(mailString):
	# get email body from the mail string
	parser = email.parser.HeaderParser()
	
	headers = parser.parsestr(mailString)

	if headers['X-Mailer'] is None:
		print "CLIENT USED : NOT_FOUND"
	else:
		print "CLIENT USED : " + str(headers['X-Mailer'])

	message = email.message_from_string(mailString)

	'''
	list all header elements
	'''
	# print message.items();

	replyString = ''

	if message.is_multipart():
		# message under consideration is multipart
		entirePayload = message.get_payload()
		for payload in entirePayload:
			extractUtility(payload, headers, True)
			break
	else:
		# message under consideration in not multipart
		payload = message.get_payload()
		extractUtility(payload, headers, False)
Ejemplo n.º 8
0
def _collect_headers(strings):
    headers, parser = {}, email.parser.Parser()

    for string in strings:
        headers.update(dict(parser.parsestr(string)))

    return headers
Ejemplo n.º 9
0
def writeMessage(string,yearlims = [1970,2020]):
    global id
    global parser
    global catalog
    global input
    parsed = parser.parsestr(string)
    metadata = dict(parsed)

    # Clean the metadata and make some elements into arrays.
    try: metadata["Path"] = metadata["Path"].split("!")
    except: pass
    try: metadata["Newsgroups"] = metadata["Newsgroups"].split(",")
    except: pass
    if "From" in metadata:
        email = emailName(metadata["From"])
        emailFields = email.elements()
        for key in emailFields.keys():
            metadata[key] = emailFields[key]
    try: 
        metadata["date"] = dateutil.parser.parse(metadata["Date"]).isoformat()
        year = metadata["date"][:4]
        if int(year) < yearlims[0] or int(year) > yearlims[1]:
            year = ""
    except: 
        pass

    id += 1
    metadata["filename"] = str(id)

    catalog.write(json.dumps(metadata) + "\n")
    input.write(str(id) + "\t" + parsed.get_payload().replace("\n"," ").replace("\t"," ") + "\n")
Ejemplo n.º 10
0
    def _process_mail(self, mailbox, uid, flags, idate, msg):
        """Process the attachments (if any) on an individual mail"""
        parser = email.parser.Parser()
        mail = parser.parsestr(msg)
        found_attachment = False
        doc_id = None

        if 'message-id' not in mail:
            mail['message-id'] = "*****@*****.**" % hashlib.sha1(repr(mail._headers)).hexdigest()
            logging.warning(" mail %s: no Message-ID, using fake-id %s", uid, mail['message-id'])

        logging.debug("Message-ID: %s", mail['message-id'])

        # quick first pass to see if we have an attachment
        for part in mail.walk():
            if self._part_is_attachment(part):
                found_attachment = True
                break

        if not found_attachment:
            logging.debug("No attachments --> skip (%d bytes)" % len(str(mail)))
            return

        if self.db is not None:
            doc_id = self._save_mail_to_db(mailbox, mail)
        if self.remove:
            self._remove_attachments(mail, doc_id, mailbox, uid, flags, idate)
Ejemplo n.º 11
0
def get_parts(response):
    """Extract parts from headers.

    Params:
        response: a request object
    Returns:
        an array of content-ids
    """
    head_lines = ''
    for k, v in response.raw.getheaders().iteritems():
        head_lines += str(k) + ':' + str(v) + '\n'

    content = str(response.content)
    full = head_lines + content

    parser = email.parser.Parser()
    decoded_reply = parser.parsestr(full)
    parts = {}
    start = decoded_reply.get_param('start').lstrip('<').rstrip('>')
    i = 0
    for part in decoded_reply.get_payload():
        cid = part.get('content-Id', '').lstrip('<').rstrip('>')
        if (not start or start == cid) and 'start' not in parts:
            parts['start'] = part.get_payload()
        else:
            parts[cid or 'Attachment%d' % i] = part.get_payload()
        i += 1
    return parts
Ejemplo n.º 12
0
    def _parse_message(self, file_path, data, headersonly=False, clean=True):
        if headersonly:
            parser = email.parser.HeaderParser()
        else:
            parser = email.parser.Parser()
        message = parser.parsestr(data, headersonly=headersonly)

        xmailfile = message['X-Mailfile'].strip()
        if xmailfile[:1] == '!':
            xmailfile = self.config.fernet.decrypt(xmailfile[1:])
        else:
            xmailfile = base64.b64decode(xmailfile)
        metadata = json.loads(xmailfile)

        if file_path and metadata['fn'] != file_path:
            raise IOError('File path mismatch: %s' % metadata['fn'])

        if clean:
            _clean_metadata(metadata)

        if headersonly:
            return metadata

        for part in message.walk():
            if part.get_content_type() == 'application/x-mailfile':
                contents = part.get_payload()
                if contents[:1] == '!':
                    contents = self.config.fernet.decrypt(contents[1:])
                else:
                    contents = base64.b64decode(contents)
                return metadata, contents[:metadata['bytes']]

        raise OSError('No data in message, %s is corrupt?' %
                      (file_path or 'file'))
Ejemplo n.º 13
0
    def __init__(self, config, body, rcpts, parse_rcpts = False, efrom = None):
        """Create a new message to the given recipients containing the text
        given in `body`.  If no envelope sender address is passed in `efrom`,
        it is guessed from the message body.  If `parse_rcpts` is true, the
        message body is parsed for additional recipients."""
        self.rcpts = set()
        for rcpt in rcpts:
            name, addr = email.utils.parseaddr(rcpt)
            if addr:
                self.rcpts.add(addr)

        parser = email.parser.Parser()
        self.message = parser.parsestr(self.received() + body, True)

        if efrom is None:
            if not self.message.has_key("from"):
                default_from = config.get_general(str, "default_from")
                if default_from:
                    self.message["From"] = default_from
            self.efrom = self.guess_envelope_from()
        else:
            self.efrom = efrom

        if not self.message.has_key("from"):
            self.message["From"] = self.efrom

        if parse_rcpts:
            self.add_recipient_addresses()

        self.fix_headers()
Ejemplo n.º 14
0
def handle( to, sender, body ):
	parser = email.parser.Parser()
	mail = parser.parsestr( body )
	message = {}
	message['to'] = to
	message['sender'] = unicode(sender)
	message['subject'] = mail['subject']
	message['received'] = time.strftime( "%Y-%m-%d %H:%M:%S" )
	message['content'] = ""
	message['attachments'] = []
	
	for part in mail.walk():
		if part.get_content_maintype() == "multipart":
			continue
		
		if not part.get_filename():
			if part.get_content_maintype() == "text":
				message['content'] += part.get_payload( decode = False )
		else:
			attachment = {}
			attachment['filename'] = part.get_filename()
			attachment['type'] = part.get_content_type()
			payload = part.get_payload( decode = True )
			attachment['payload-id'] = storage.store_attachment( payload, part.get_content_type() )
			message['attachments'].append( attachment)
			
	storage.store_mail( message )
Ejemplo n.º 15
0
def getMessagesForCriteria(M, sender=None, subject=None):
	returnMessages = []

	#Obtain messages from sender "Banana Republic"
	#---------------------------------------------
	
	searchString = composeSearchString(sender, subject)
	
	print "search string: %s" % searchString

	resp, data = M.search(None, searchString)

	message_ids = data[0].split()

	print "# matching messages: %s" % str(len(message_ids))
	#---------------------------------------------

	parser = email.parser.Parser()

	#From each IMAP email string, obtain a Message
	#object
	#-----------------------------------------
	for m_id in message_ids:
		resp, data = M.fetch(m_id, "(RFC822)")
		newMessage = parser.parsestr(data[0][1])

		returnMessages.append(newMessage)
	#-----------------------------------------

	return returnMessages
Ejemplo n.º 16
0
def _collect_headers(strings):
    headers, parser = {}, email.parser.Parser()

    for string in strings:
        headers.update(dict(parser.parsestr(string)))

    return headers
Ejemplo n.º 17
0
    def test_create_mailing_from_message(self):

        parser = email.parser.Parser()
        msg = parser.parsestr("""Content-Transfer-Encoding: 7bit
Content-Type: multipart/alternative; boundary="===============2840728917476054151=="
Subject: Great news!
From: Mailing Sender <*****@*****.**>
To: <*****@*****.**>
Date: Wed, 05 Jun 2013 06:05:56 -0000

This is a multi-part message in MIME format.
--===============2840728917476054151==
Content-Type: text/plain; charset="windows-1252"
Content-Transfer-Encoding: quoted-printable

This is a very simple mailing. I=92m happy.
--===============2840728917476054151==
Content-Type: text/html; charset="windows-1252"
Content-Transfer-Encoding: quoted-printable

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html><head>
<META http-equiv=3DContent-Type content=3D"text/html; charset=3Diso-8859-1">
</head>
<body>
This is <strong> a very simple</strong> <u>mailing</u>. =
I=92m happy! Nothing else to say...
</body></html>

--===============2840728917476054151==--
""")
        mailing = Mailing.create_from_message(msg, mail_from='*****@*****.**',
                                                   sender_name='Mailing Sender',
                                                   scheduled_start=None, scheduled_duration=None)

        message = parser.parsestr(mailing.header + mailing.body)
        assert(isinstance(message, email.message.Message))
        self.assertTrue(message.is_multipart())
        self.assertEquals("multipart/alternative", message.get_content_type())
        self.assertIsInstance(message.get_payload(i=0), email.message.Message)
        self.assertEquals("text/plain", message.get_payload(i=0).get_content_type())
        self.assertEquals("windows-1252", message.get_payload(i=0).get_param('charset'))
        self.assertEquals("text/html", message.get_payload(i=1).get_content_type())
        self.assertEquals("windows-1252", message.get_payload(i=1).get_param('charset'))
        self.assertEquals("This is a very simple mailing. I\x92m happy.", message.get_payload(i=0).get_payload(decode=True))
        self.assertIn("This is <strong> a very simple</strong> <u>mailing</u>. I\x92m happy! ", message.get_payload(i=1).get_payload(decode=True))
Ejemplo n.º 18
0
def decode_message(headers, raw_message):
    if headers['content-type'].startswith('multipart/'):
        raw_reply = str(headers) + raw_message
        parser = email.parser.Parser()
        decoded_reply = parser.parsestr(raw_reply)
        payload_parts = (part.get_payload()
                         for part in decoded_reply.get_payload())
        return ''.join(payload_parts)
    else:
        return raw_message
Ejemplo n.º 19
0
 def listTasks(self, mailbox="INBOX", criterion="(ALL)"):
     res = []
     self.select(mailbox, True)
     typ, msgnums = self.search('UTF-8', criterion)
     for num in msgnums[0].split():
         typ, data = self.fetch(num, '(BODY[HEADER.FIELDS (SUBJECT FROM)])')
         parser = email.parser.HeaderParser()
         msg = parser.parsestr(data[0][1])
         text = headerUnicode(msg['From']) + " : " + headerUnicode(msg['Subject'])
         res.append(text)
     return res
Ejemplo n.º 20
0
def name_extractor(path):
    """ Extracts names from Enron E-mail files """
    parser = email.parser.Parser()
    for root, _, files in os.walk(path):
        print(root)
        for message_file in files:
            path = os.path.join(root, message_file)
            with codecs.open(path, 'r', 'Latin-1') as message_file:
                content = message_file.read()
                message = parser.parsestr(content)
                yield (message['From'], message['X-From'])
Ejemplo n.º 21
0
 def getHeaders(self, conn, imapid):
     """
         Returns message headers
     """
     (res, data) = conn.fetch(imapid, '(BODY[HEADER])')
     if res != 'OK':
         raise RuntimeError('Unvalid reply: ' + res)
     parser = email.parser.HeaderParser()
     dv = "".join(map(chr, data[0][1]))
     #print(dv);
     return parser.parsestr(dv)
Ejemplo n.º 22
0
def name_extractor(path):
    """ Extracts names from Enron E-mail files """
    parser = email.parser.Parser()
    for root, _, files in os.walk(path):
        print(root)
        for message_file in files:
            path = os.path.join(root, message_file)
            with codecs.open(path, 'r', 'Latin-1') as message_file:
                content = message_file.read()
                message = parser.parsestr(content)
                yield (message['From'], message['X-From'])
Ejemplo n.º 23
0
def parse_origin_packages(packages_text):
    packages = []
    linebuffer = ""
    for line in packages_text.splitlines():
        if line == "":
            parser = email.parser.Parser()
            package = parser.parsestr(linebuffer)
            packages.append(package)
            linebuffer = ""
        else:
            linebuffer += line + "\n"
    return packages
Ejemplo n.º 24
0
 def fetch(self, folder='Inbox'):
   # Where do we mark messages as seen?
   parser = email.parser.Parser()
   self.imap.select(folder)
   status, uids = self.imap.search(None, 'UNSEEN')
   uids = uids[0].replace(' ',',')
   if uids:
     status, data = self.imap.fetch(uids, '(RFC822)')
   else:
     data = []
   self.imap.close()
   return [(uid.split()[0], parser.parsestr(mail)) for (uid, mail) in data[::2]]
Ejemplo n.º 25
0
def bulk_fetch(uids, fields, chunk_size=100, raw_message=False):
    """
    Fetches a bunch of messages and makes them available as a generator.
    """
    parser = email.parser.Parser()
    for uid_chunk, pos in chunker(uids, chunk_size):
        print "Fetching chunk %s" % pos
        type, data = mail.uid("fetch", ",".join(uid_chunk), fields)
        if typ != "OK": raise Exception("Failed to retrieve messsages")
        # Check each message in this batch
        for msg in data:
            # Parse out the UID as well
            if not "(UID " in msg[0]:
                continue
            uid = msg[0].split(" ", 4)[2]
            # Parse the message and pass it back
            # If raw_message=True, only parse the headers, and return the entire message separately
            if raw_message:
                yield uid, parser.parsestr(msg[1], True), msg[1]
            else:
                yield uid, parser.parsestr(msg[1])
Ejemplo n.º 26
0
def notify(ui, account):
    encoding = locale.getpreferredencoding(False)
    account_name = account.getname().decode(encoding)
    conf = get_config(ui)
    notify_send = functools.partial(send_notification, ui, conf)
    summary_formatter = MailNotificationFormatter(escape=False,
                                                  failstr=conf['failstr'])
    body_formatter = MailNotificationFormatter(escape=True,
                                               failstr=conf['failstr'])

    count = 0
    body = []
    for folder, contents in ui.new_messages[account].iteritems():
        count += len(contents)
        body.append(
            body_formatter.format(conf['digest-body'],
                                  count=len(contents),
                                  folder=folder))

    if count > conf['max']:
        summary = summary_formatter.format(conf['digest-summary'],
                                           count=count,
                                           account=account_name)
        return notify_send(summary, '\n'.join(body))

    need_body = '{body' in conf['body'] or '{body' in conf['summary']
    parser = email.parser.Parser()
    for folder, contents in ui.new_messages[account].iteritems():
        format_args = {
            'account': account_name,
            'folder': folder.decode(encoding)
        }
        for content in contents:
            message = parser.parsestr(content.get('message'),
                                      headersonly=not need_body)
            format_args['h'] = HeaderDecoder(message, failstr=conf['failstr'])
            if need_body:
                for part in message.walk():
                    if part.get_content_type() == 'text/plain':
                        charset = part.get_content_charset()
                        payload = part.get_payload(decode=True)
                        format_args['body'] = payload.decode(charset)
                        break
                else:
                    format_args['body'] = conf['failstr']
            try:
                notify_send(
                    summary_formatter.vformat(conf['summary'], (),
                                              format_args),
                    body_formatter.vformat(conf['body'], (), format_args))
            except (AttributeError, KeyError, TypeError, ValueError) as exc:
                ui.error(exc, msg='In notification format specification')
Ejemplo n.º 27
0
def mboxo_generator(input, parser=email.parser.Parser()):
        '''Yield each message found in a ``input`` in ``mboxo`` / ``mboxrd`` format
        '''
        assert type(input) is file
        data = []
        for line in input:
                if line[:5] == 'From ' or line == '':
                        if data:
                                yield parser.parsestr(''.join(data))
                                data = []
                        elif line == '':
                                raise StopIteration
                data.append(line)
Ejemplo n.º 28
0
def send_email(content,
               smtp_server = config.email_smtp_server, 
               verbose = False):
    s = smtplib.SMTP(smtp_server)

    # get the envelope From and To by parsing the message
    parser = email.parser.Parser()
    parsed_msg = parser.parsestr(content)
    from_addr = _get_address(parsed_msg.get("From"))
    to_addr = _get_address(parsed_msg.get("To"))
    util.call_verbose("Sending email", verbose, 
                      s.sendmail,
                      from_addr, to_addr, content)
Ejemplo n.º 29
0
    def __init__(self, fromlines=None, fromstring=None, fromfile=None):
        #self.log = Logger()
        self.recipient = None
        self.received_by = None
        self.received_from = None
        self.received_with = None
        self.__raw = None
        parser = email.parser.Parser()

        # Message is instantiated with fromlines for POP3, fromstring for
        # IMAP (both of which can be badly-corrupted or invalid, i.e. spam,
        # MS worms, etc).  It's instantiated with fromfile for the output
        # of filters, etc, which should be saner.
        if fromlines:
            try:
                self.__msg = parser.parsestr(os.linesep.join(fromlines))
            except email.errors.MessageError as o:
                self.__msg = corrupt_message(o, fromlines=fromlines)
            self.__raw = os.linesep.join(fromlines)
        elif fromstring:
            try:
                self.__msg = parser.parsestr(fromstring)
            except email.errors.MessageError as o:
                self.__msg = corrupt_message(o, fromstring=fromstring)
            self.__raw = fromstring
        elif fromfile:
            try:
                self.__msg = parser.parse(fromfile)
            except email.errors.MessageError as o:
                # Shouldn't happen
                self.__msg = corrupt_message(o, fromstring=fromfile.read())
            # fromfile is only used by getmail_maildir, getmail_mbox, and
            # from reading the output of a filter.  Ignore __raw here.
        else:
            # Can't happen?
            raise SystemExit('Message() called with wrong arguments')

        self.sender = address_no_brackets(self.__msg['return-path']
                                          or 'unknown')
Ejemplo n.º 30
0
    def test_create_mailing_from_message_with_encoded_headers(self):

        parser = email.parser.Parser()
        msg = parser.parsestr("""Content-Transfer-Encoding: 7bit
Content-Type: multipart/alternative; boundary="===============2840728917476054151=="
Subject: Great news!
From: =?UTF-8?B?Q2VkcmljIFJJQ0FSRA==?= <*****@*****.**>
To: <*****@*****.**>
Date: Wed, 05 Jun 2013 06:05:56 -0000

This is a multi-part message in MIME format.
--===============2840728917476054151==
Content-Type: text/plain; charset="windows-1252"
Content-Transfer-Encoding: quoted-printable

This is a very simple mailing. I=92m happy.
--===============2840728917476054151==
Content-Type: text/html; charset="windows-1252"
Content-Transfer-Encoding: quoted-printable

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html><head>
<META http-equiv=3DContent-Type content=3D"text/html; charset=3Diso-8859-1">
</head>
<body>
This is <strong> a very simple</strong> <u>mailing</u>. =
I=92m happy! Nothing else to say...
</body></html>

--===============2840728917476054151==--
""")
        mailing = Mailing.create_from_message(msg, scheduled_start=None, scheduled_duration=None)

        message = parser.parsestr(mailing.header + mailing.body)
        assert(isinstance(message, email.message.Message))
        mail_from = header_to_unicode(message.get("From"))

        self.assertEquals(u"Cedric RICARD <*****@*****.**>", mail_from)
Ejemplo n.º 31
0
 def process(self, peer, mailfrom, rcpttos, data, **params):
     """Saves email attachments in the specified directory"""
     parser = email.parser.Parser()
     msgobj = parser.parsestr(data)
     for part in msgobj.walk():
         if part.is_multipart():
             # multipart are just containers
             continue
         filename = part.get_filename()
         if not filename:
             # Not an attachment
             continue
         self.process_attachment(part.get_payload(decode=True), filename,
                                 **params)
Ejemplo n.º 32
0
def parse_origin_packages(uri, version, target_name):
    packages = []
    linebuffer = ""
    for line in (urlopen(
            get_target_url(uri, version, target_name) +
            "/packages/Packages").read().decode().splitlines()):
        if line == "":
            parser = email.parser.Parser()
            package = parser.parsestr(linebuffer)
            packages.append(package)
            linebuffer = ""
        else:
            linebuffer += line + "\n"
    return packages
Ejemplo n.º 33
0
def read_message(path):
    """ Reads an enron message file into a Message tuple """
    parser = email.parser.Parser()
    with codecs.open(path, 'r', 'Latin-1') as message_file:
        content = message_file.read()
        message = parser.parsestr(content)
        recipients = ()
        if message['To'] is not None:
            recipients = tuple(m.strip(',') for m in message['To'].split())
        return Message(message['From'],
                       recipients,
                       dateutil.parser.parse(message['Date']),
                       message['Subject'],
                       message.get_payload())
Ejemplo n.º 34
0
 def send(self, fromaddr, toaddrs, message):
     parser = email.parser.Parser()
     msg = parser.parsestr(message)
     messageid = msg.get('Message-Id')
     if messageid:
         if not messageid.startswith('<') or not messageid.endswith('>'):
             raise ValueError('Malformed Message-Id header')
         messageid = messageid[1:-1]
     else:
         messageid = self.newMessageId()
         message = 'Message-Id: <%s>\n%s' % (messageid, message)
     transaction.get().join(
         self.createDataManager(fromaddr, toaddrs, message))
     return messageid
Ejemplo n.º 35
0
 def process(self, peer, mailfrom, rcpttos, data, **params):
     """Saves email attachments in the specified directory"""
     parser = email.parser.Parser()
     msgobj = parser.parsestr(data)
     for part in msgobj.walk():
         if part.is_multipart():
             # multipart are just containers
             continue
         filename = part.get_filename()
         if not filename:
             # Not an attachment
             continue
         self.process_attachment(part.get_payload(decode=True), filename,
                                 **params)
Ejemplo n.º 36
0
def reademl():
    filepath = input("\nEnter file path: ")
    try:
        emlFile = open(filepath, "r")
        msg = email.message_from_file(emlFile)
        emlFile.close()

        parser = email.parser.HeaderParser()
        header = parser.parsestr(msg.as_string())
        print()

        for h in header.items():
            print(*h)
        print()
    except:
        error("Bad file | Encoding error")
Ejemplo n.º 37
0
def git_am_patch_split(f, encoding=None):
    """Parse a git-am-style patch and split it up into bits.

    :param f: File-like object to parse
    :param encoding: Encoding to use when creating Git objects
    :return: Tuple with commit object, diff contents and git version
    """
    encoding = encoding or getattr(f, "encoding", "ascii")
    contents = f.read()
    if type(contents) is bytes and getattr(email.parser, "BytesParser", None):
        parser = email.parser.BytesParser()
        msg = parser.parsebytes(contents)
    else:
        parser = email.parser.Parser()
        msg = parser.parsestr(contents)
    return parse_patch_message(msg, encoding)
Ejemplo n.º 38
0
def parse_origin_packages():
    get_file(
        f"{origin_url}/{target_dir}/packages/Packages",
        rebuild_path / "Packages",
    )
    packages = {}
    linebuffer = ""
    for line in (rebuild_path / "Packages").read_text().splitlines():
        if line == "":
            parser = email.parser.Parser()
            package = parser.parsestr(linebuffer)
            packages[package["Filename"]] = package
            linebuffer = ""
        else:
            linebuffer += line + "\n"
    return packages
Ejemplo n.º 39
0
    def test_dkim(self):
        privkey = self._get_dkim_privkey()
        mailing = factories.MailingFactory(dkim={'selector': 'mail', 'domain': 'unittest.cloud-mailing.net', 'privkey':privkey})
        recipient = factories.RecipientFactory(mailing=mailing)

        message_str = self._customize(recipient)

        self.assertNotIn(b"\r\n", message_str)

        parser = email.parser.Parser()
        message = parser.parsestr(message_str, headersonly=False)
        assert (isinstance(message, email.message.Message))
        self.assertTrue('DKIM-Signature' in message)
        # print message['DKIM-Signature']

        self.assertTrue(dkim.verify(message_str, dnsfunc=self._get_txt))
Ejemplo n.º 40
0
def git_am_patch_split(f, encoding=None):
    """Parse a git-am-style patch and split it up into bits.

    :param f: File-like object to parse
    :param encoding: Encoding to use when creating Git objects
    :return: Tuple with commit object, diff contents and git version
    """
    encoding = encoding or getattr(f, "encoding", "ascii")
    contents = f.read()
    if isinstance(contents, bytes) and getattr(email.parser, "BytesParser", None):
        parser = email.parser.BytesParser()
        msg = parser.parsebytes(contents)
    else:
        parser = email.parser.Parser()
        msg = parser.parsestr(contents)
    return parse_patch_message(msg, encoding)
Ejemplo n.º 41
0
 def extractHeaderTuples(self, uid):
     self.logger.info('Getting header information for {0:d}'.format(uid))
     parser = email.parser.HeaderParser()
     rc, data = self.M.uid('FETCH', uid, '(BODY.PEEK[HEADER.FIELDS (FROM SUBJECT)] UID)')
     headers = parser.parsestr(data[0][1])
     filterValues = []
     for header, value in headers.items():
         decodedElements = email.header.decode_header(value)
         decodedValueList = []
         for element, charset in decodedElements:
             if charset == None:
                 decodedValueList.append(element)
             else:
                 decodedValueList.append(element.decode(charset).encode('utf-8'))
         filterValues.append((header, ' '.join(decodedValueList)))
     self.logger.debug('Extracted headers for uid {0:d} - {1}'.format(uid, filterValues))
     return filterValues
Ejemplo n.º 42
0
    def metadata(self):
        if self._metadata == None:
            # Extract the structured data from METADATA in the WHL's dist-info
            # directory.
            with zipfile.ZipFile(self.path(), 'r') as whl:
                with whl.open(self._dist_info() + '/METADATA') as f:
                    # Why are we using email.parser?
                    #
                    # From PEP-0314:
                    #   The PKG-INFO file format is a single set of RFC-822 headers parseable by the rfc822.py module.
                    #   The field names listed in the following section are used as the header names.
                    #
                    # The rfc822.py module has been deprecated since version 2.3 in favor of the email package.
                    parser = email.parser.Parser()
                    self._metadata = parser.parsestr(f.read().decode(
                        'ascii', 'ignore'))

        return self._metadata
Ejemplo n.º 43
0
def email_extractor(path):
    """ Extracts email addresses from Enron E-mail files """
    parser = email.parser.Parser()
    outbox_re = [re.compile(r) for r in ["sent_items$", "sent$", "sent_mail$"]]
    for root, _, files in os.walk(path):
        if not any(re.search(root) for re in outbox_re):
            continue
        print(root)
        for message_file in files:
            path = os.path.join(root, message_file)
            with codecs.open(path, "r", "Latin-1") as message_file:
                content = message_file.read()
                message = parser.parsestr(content)
                yield message["From"]
                if message["To"] is not None:
                    for m in message["To"].replace(",", " ").split():
                        if m is not None:
                            yield m
Ejemplo n.º 44
0
def email_extractor(path):
    """ Extracts email addresses from Enron E-mail files """
    parser = email.parser.Parser()
    outbox_re = [re.compile(r) for r in ['sent_items$', 'sent$', 'sent_mail$']]
    for root, _, files in os.walk(path):
        if not any(re.search(root) for re in outbox_re):
            continue
        print(root)
        for message_file in files:
            path = os.path.join(root, message_file)
            with codecs.open(path, 'r', 'Latin-1') as message_file:
                content = message_file.read()
                message = parser.parsestr(content)
                yield message['From']
                if message['To'] is not None:
                    for m in message['To'].replace(',', ' ').split():
                        if m is not None:
                            yield m
Ejemplo n.º 45
0
def map(event):
    message = json.loads(event['Records'][0]['Sns']['Message'])

    total_jobs = message['total_jobs']
    run_id = message['run_id']
    job_id = message['job_id']

    counts = {}

    bucket = 'brianz-dev-mapreduce-results'
    bucket = os.environ['REDUCE_RESULTS_BUCKET']

    tmp_file = download_from_s3(message['bucket'], message['key'])

    parser = email.parser.Parser()

    for line in _csv_lines_from_filepath(tmp_file):
        msg = line['message']
        eml = parser.parsestr(msg, headersonly=True)
        _from = eml['From']
        _tos = eml.get('To')

        if not _tos:
            continue

        _tos = (t.strip() for t in _tos.split(','))

        for from_to in itertools.product([_from], _tos):
            if from_to not in counts:
                counts[from_to] = 1
            else:
                counts[from_to] += 1

    if not counts:
        return

    metadata = {
            'job_id': str(job_id),
            'run_id': str(run_id),
            'total_jobs': str(total_jobs),
    }

    key = 'run-%s/mapper-%s-done.csv' % (run_id, job_id)
    write_csv_to_s3(bucket, key, counts, Metadata=metadata)
Ejemplo n.º 46
0
 def headers(self):
     if self._headers is None:
         logger.debug("raw headers: " + repr(self.raw_headers))
         headers_buffer = BytesIO(self.raw_headers)
         try:
             # py 2
             # seekable has to be 0, otherwise it won't parse anything
             m = httplib.HTTPMessage(headers_buffer, seekable=0)
             m.readheaders()
             self._headers = m.dict
         except TypeError as ex:
             # py 3
             if ex.args[0] == "__init__() got an unexpected keyword argument 'seekable'":
                 parser = email.parser.Parser()
                 m = parser.parsestr(self.raw_headers.decode('iso-8859-1'))
                 self._headers = dict(m.items())
             else:
                 raise
     return self._headers
Ejemplo n.º 47
0
    def test_dkim_and_feedback_loop(self):
        privkey = self._get_dkim_privkey()
        mailing = factories.MailingFactory(dkim={'selector': 'mail', 'domain': 'unittest.cloud-mailing.net', 'privkey':privkey},
                                           feedback_loop={'dkim': {'selector': 'mail', 'domain': 'unittest.cloud-mailing.net', 'privkey':privkey},
                                                          'sender_id': 'CloudMailing'})
        recipient = factories.RecipientFactory(mailing=mailing)

        message_str = self._customize(recipient)

        self.assertNotIn(b"\r\n", message_str)

        parser = email.parser.Parser()
        message = parser.parsestr(message_str, headersonly=False)
        assert (isinstance(message, email.message.Message))
        self.assertTrue('Feedback-ID' in message)
        self.assertEqual(2, len(message.get_all('DKIM-Signature')))

        d = dkim.DKIM(message_str)
        self.assertTrue(d.verify(0, dnsfunc=self._get_txt))
        self.assertTrue(d.verify(1, dnsfunc=self._get_txt))
Ejemplo n.º 48
0
    def test_feedback_loop(self):
        privkey = self._get_dkim_privkey()
        mailing = factories.MailingFactory(feedback_loop={'dkim': {'selector': 'mail', 'domain': 'unittest.cloud-mailing.net', 'privkey':privkey},
                                                          'sender_id': 'CloudMailing'},
                                           domain_name='cloud-mailing.net')
        recipient = factories.RecipientFactory(mailing=mailing)

        message_str = self._customize(recipient)

        self.assertNotIn(b"\r\n", message_str)

        parser = email.parser.Parser()
        message = parser.parsestr(message_str, headersonly=False)
        assert (isinstance(message, email.message.Message))
        self.assertTrue('Feedback-ID' in message)
        self.assertTrue('DKIM-Signature' in message)
        # print message['Feedback-ID']
        self.assertEqual('%d:cloud-mailing.net:%s:CloudMailing' % (mailing.id, mailing.type), message['Feedback-ID'])

        self.assertTrue(dkim.verify(message_str, dnsfunc=self._get_txt))
Ejemplo n.º 49
0
    def __init__(self,string,id=None):
        """
        Initializes with an e-mail string and, optionally, a parser.
        (Operations will be faster if you don't create the parser anew each time.
        """
        global parser
        self.string = string
        try:
            self.parsed = parser.parsestr(string)
        except UnicodeEncodeError:
            raise
            
        # Creating a uuid a little early.
        # THIS ALWAYS FAILS. EVERY UUID IS THE SAME. WHY????

        if id is None:
            self.uuid = uuid.uuid1()
            self.uuid = self.uuid.hex
        else:
            self.uuid=id
Ejemplo n.º 50
0
 def extractHeaderTuples(self, uid):
     self.logger.info('Getting header information for {0:d}'.format(uid))
     parser = email.parser.HeaderParser()
     rc, data = self.M.uid('FETCH', uid,
                           '(BODY.PEEK[HEADER.FIELDS (FROM SUBJECT)] UID)')
     headers = parser.parsestr(data[0][1])
     filterValues = []
     for header, value in headers.items():
         decodedElements = email.header.decode_header(value)
         decodedValueList = []
         for element, charset in decodedElements:
             if charset == None:
                 decodedValueList.append(element)
             else:
                 decodedValueList.append(
                     element.decode(charset).encode('utf-8'))
         filterValues.append((header, ' '.join(decodedValueList)))
     self.logger.debug('Extracted headers for uid {0:d} - {1}'.format(
         uid, filterValues))
     return filterValues
Ejemplo n.º 51
0
    def searchMessageSubjects(self, term=None):
        if (not self.srv):
            return []

        if (not term):
            return []

        matched = []
        self.srv.select(readonly=True)
        search_term = self.buildSearchTerm("Subject", term)
        typ, data = self.srv.search(None, search_term)
        for uid in data[0].split():
            header = self.srv.fetch(uid, '(BODY[HEADER])')
            if (header):
                header_data = header[1][0][1]
                parser = email.parser.HeaderParser()
                msg = parser.parsestr(header_data)
                print "#%s [%s] -> [%s]" % (uid, msg['from'], msg['subject'])

                if not uid in matched:
                    matched.append(uid)
        return matched
Ejemplo n.º 52
0
    def getXsubjects(self, num=10):
        if (not self.srv):
            return

        numMessages = self.srv.select(readonly=True)[1][0]
        typ, data = self.getMessagesReverseOrder()
        maxNum = num
        if (numMessages < num):
            maxNum = numMessages

        i = 1
        for num in data[0].split():
            header = self.srv.fetch(num, '(BODY[HEADER])')
            if (header):
                header_data = header[1][0][1]
                parser = email.parser.HeaderParser()
                msg = parser.parsestr(header_data)
                print "#%i [%s] -> [%s]" % (i, msg['from'], msg['subject'])
            i = i + 1
            if (i > maxNum):
                return
        return None
Ejemplo n.º 53
0
    def getXsubjects(self, num=10):
        if (not self.srv):
            return

        numMessages = self.srv.select(readonly=True)[1][0]
        typ, data = self.getMessagesReverseOrder()
        maxNum = num
        if (numMessages < num):
            maxNum = numMessages

        i = 1
        for num in data[0].split():
            header = self.srv.fetch(num, '(BODY[HEADER])')
            if (header):
                header_data = header[1][0][1]
                parser = email.parser.HeaderParser()
                msg = parser.parsestr(header_data)
                print "#%i [%s] -> [%s]" % (i, msg['from'], msg['subject'])
            i = i + 1
            if (i > maxNum):
                return
        return None
Ejemplo n.º 54
0
 def dosync(self):
     try:
         a, b = self.M.sort("DATE", "UTF-8", "UNSEEN")
     except:
         a, b = self.M.search("UTF-8", "UNSEEN")
     if a == "OK" and len(b) > 0 and len(b[0]) > 0:
         # print(b)
         flood_excess = 0
         for id in b[0].split():
             data = self.M.fetch(id, "(RFC822)")
             if data[1][0][0 : len(id)] == id:
                 header_data = data[1][1][1]
             else:
                 header_data = data[1][0][1]
             parser = email.parser.HeaderParser()
             msg = parser.parsestr(header_data)
             msg = "".join([self.format_header(header, msg) for header in self.notifier.headers])
             for chan in self.notifier.noticed:
                 self.notifier.notice(chan.split(" ", 1)[0], msg)
                 flood_excess += 1
                 if flood_excess >= 5:
                     time.sleep(2)
     elif a != "OK":
         print(a)
Ejemplo n.º 55
0
 def asMIMEText(self, num):
     typ, data = self.fetch(num, '(RFC822)')
     parser = email.parser.Parser()
     msg = parser.parsestr(data[0][1])
     return msg
Ejemplo n.º 56
0
 async def parse(self):
     requestline = await asyncio.wait_for(self.reader.readline(), self.keep_alive_timeout)
     if not requestline:
         return
     self.requestline = requestline.strip().decode()
     if not self.requestline:
         return
     words = self.requestline.split(' ')
     assert len(words) == 3, 'Bad request syntax (%r)' % self.requestline
     self.method, self.path, version = words
     assert version.startswith('HTTP/'), 'Bad request version (%r)' % version
     version_number = version[5:].split('.')
     assert len(version_number) == 2, 'Bad request version (%r)' % version
     protocol_version = tuple(map(int, version_number))
     if protocol_version >= (2, 0):
         raise errors.HTTPError(505, "Invalid HTTP Version (%s)" % version)
     if protocol_version >= (1, 1):
         self.keep_alive = True
     if protocol_version < self.protocol_version:
         self.protocol_version = protocol_version
     # Examine the headers and look for a Connection directive.
     header_lines = []
     while True:
         line = await asyncio.wait_for(self.reader.readline(), self.keep_alive_timeout)
         if not line.strip():
             break
         header_lines.append(line.decode())
     try:
         parser = email.parser.Parser(_class=http.client.HTTPMessage)
         self.headers = parser.parsestr(''.join(header_lines))
     except http.client.LineTooLong:
         raise errors.HTTPError(400, "Line too long")
     conntype = self.headers.get('Connection', "")
     if conntype.lower() == 'close':
         self.keep_alive = False
     elif conntype.lower() == 'keep-alive' and protocol_version >= (1, 1):
         self.keep_alive = True
     self.env['SERVER_PROTOCOL'] = 'HTTP/%d.%d' % protocol_version
     self.env['REQUEST_METHOD'] = self.method
     self.env['CONTENT_TYPE'] = self.headers.get('content-type')
     self.env['CONTENT_LENGTH'] = self.headers.get('content-length')
     for key, value in self.headers.items():
         key = key.replace('-', '_').upper()
         if key in self.env:
             continue
         key = 'HTTP_' + key
         value = value.strip()
         oldvalue = self.env.get(key)
         if oldvalue is None:
             self.env[key] = value
         else:
             self.env[key] = oldvalue + ',' + value
     self.env['REQUEST_URI'] = self.path
     host = self.env.get('HTTP_HOST')
     self.port = None
     if host:
         hostname, _, port = host.rpartition(':')
         if _:
             self.hostname = hostname
             self.port = int(port)
     self._accept = self.init_q(self.headers.get('accept'))
     self._accept_encoding = self.init_q(self.headers.get('accept-encoding'))
     return True
Ejemplo n.º 57
0
 def mark(self, contents):
     parser = email.parser.Parser()
     message = parser.parsestr(contents)
     self.date(message)
     self.user_agent(message)
     return message.as_string()
Ejemplo n.º 58
0
args = parser.parse_args()

with open(args.template) as fid:
    template = fid.read()

sender = {"mail": "Bartosz Telenczuk <*****@*****.**>", "firstname": "Bartosz"}

students = csv.DictReader(open(args.emails_csv))

parser = email.parser.Parser()
messages = []
email_addresses = []
for student in students:
    email_txt = template.format(firstname=student["firstname"])
    msg = parser.parsestr(email_txt)
    msg["From"] = sender["mail"]
    msg["To"] = student["mail"]
    print(msg)
    msg.set_charset("utf-8")
    messages.append((sender["mail"], student["mail"], msg))
    email_addresses.append(student["mail"])

r = "No"
if args.send == "yes":
    print("\n" + "\n".join(email_addresses))
    r = input('Do you really want to send messages to these recipients (if yes type "Yes")? ')
    r = r.lower()

if r == "yes":
    import smtplib