def test_from_regex (self): # Testing new regex from bug #1633678 f = open(self._path, 'w') f.write("""From [email protected] Mon May 31 13:24:50 2004 +0200 Subject: message 1 body1 From [email protected] Mon May 31 13:24:50 2004 -0200 Subject: message 2 body2 From [email protected] Mon May 31 13:24:50 2004 Subject: message 3 body3 From [email protected] Mon May 31 13:24:50 2004 Subject: message 4 body4 """) f.close() box = mailbox.UnixMailbox(open(self._path, 'r')) self.assert_(len(list(iter(box))) == 4)
def main(): import mailbox print('Reading input file...') f = open("mbox", 'rb') mbox = mailbox.UnixMailbox(f) msglist = [] while 1: msg = mbox.next() if msg is None: break m = make_message(msg) msglist.append(m) f.close() print('Threading...') subject_table = thread(msglist) # Output L = subject_table.items() L.sort() for subj, container in L: print_container(container)
def _selectBox(self): # mBox Strict if self.boxtype.get() == self.boxtyps[0]: self.mb = mailbox.UnixMailbox(file(self.mailbox.get(), 'r')) self.Disp(self.boxtype.get(), " at location ", self.mailbox.get(), " Opened Successfully.") # mBox Loose elif self.boxtype.get() == self.boxtyps[1]: self.mb = mailbox.PortableUnixMailbox(file(self.mailbox.get(), 'r')) self.Disp(self.boxtype.get(), " at location ", self.mailbox.get(), " Opened Successfully.") # MailDir elif self.boxtype.get() == self.boxtyps[2]: self.mb = mailbox.Maildir(os.path.dirname(self.mailbox.get())) self.Disp(self.boxtype.get(), " at location ", os.path.dirname(self.mailbox.get()), " Opened Successfully.") # MMDF elif self.boxtype.get() == self.boxtyps[3]: self.mb = mailbox.MmdfMailbox(file(self.mailbox.get(), 'r')) self.Disp(self.boxtype.get(), " at location ", self.mailbox.get(), " Opened Successfully.") # MH elif self.boxtype.get() == self.boxtyps[4]: self.mb = mailbox.MHMailbox(file(self.mailbox.get(), 'r')) self.Disp(self.boxtype.get(), " at location ", self.mailbox.get(), " Opened Successfully.") # Babyl elif self.boxtype.get() == self.boxtyps[5]: self.mb = mailbox.BabylMailbox(file(self.mailbox.get(), 'r')) self.Disp(self.boxtype.get(), " at location ", self.mailbox.get(), " Opened Successfully.") #Unknown File Type else: self.Disp("*** I don't know about that file type.") self.running = 2
def load_from_file(): if tornado.options.options.init: delete_index() create_index() if tornado.options.options.skip: logging.info("Skipping first %d messages from mbox file" % tornado.options.options.skip) count = 0 upload_data = list() logging.info("Starting import from file %s" % tornado.options.options.infile) mbox = mailbox.UnixMailbox(open(tornado.options.options.infile, 'rb'), email.message_from_file) emailParser = DelegatingEmailParser( [AmazonEmailParser(), SteamEmailParser()]) for msg in mbox: count += 1 if count < tornado.options.options.skip: continue item = convert_msg_to_json(msg) if item: upload_data.append(item) if len(upload_data) == tornado.options.options.batch_size: upload_batch(upload_data) upload_data = list() # upload remaining items in `upload_batch` if upload_data: upload_batch(upload_data) logging.info("Import done - total count %d" % count)
def setupUpdatesMethod(self, numUpdates): """ this method prepares a datastructure for the updates test. we are reading the first n mails from the primary mailbox. they are used for the update test """ i = 0 dict = {} mb = mailbox.UnixMailbox(open(mbox, "r")) msg = mb.next() while msg and i < numUpdates: obj = testMessage(msg) mid = msg.dict.get("message-id", None) if mid: dict[mid] = obj i = i + 1 msg = mb.next() return dict
#!/usr/bin/env python import mailbox # read various mailbox formats (incl. Unix format) import smtplib # tools for sending mail import string # open a part of my outgoing mailbox, for resending: mbox = mailbox.UnixMailbox(open("testbox", "r")) while 1: msg = mbox.next() # grab next message if not msg: break # can extract mail header fields like a dictionary, # e.g. msg['To'], msg['From'], msg['cc'], msg['date'] etc. # msg.keys() lists all keys for this message # msg.fp.read() gets the body of the message # str(msg) is the header of the message # send message to msg['To'], those on the cc list, and myself: to = [msg['To']] if msg.has_key('cc'): to += map(string.strip, msg['cc'].split(',')) to.append(msg['From']) # add myself message = str(msg) + """ Due to an error with my email connection, the email I sent you on %s may not have reached you. A copy of the message is inserted below. I apologize if you end up with multiple copies of this message. ===============================================================================
json_part = {} if part.get_content_maintype() == 'multipart': continue json_part['contentType'] = part.get_content_type() content = part.get_payload(decode=False).decode('utf-8', 'ignore') json_part['content'] = cleanContent(content) json_msg['parts'].append(json_part) # Finally, convert date from asctime to milliseconds since epoch using the # $date descriptor so it imports "natively" as an ISODate object in MongoDB then = parse(json_msg['Date']) millis = int( time.mktime(then.timetuple()) * 1000 + then.microsecond / 1000) json_msg['Date'] = {'$date': millis} return json_msg mbox = mailbox.UnixMailbox(open(MBOX, 'rb'), email.message_from_file) # Write each message out as a JSON object on a separate line # for easy import into MongoDB via mongoimport f = open(OUT_FILE, 'w') for msg in gen_json_msgs(mbox): if msg != None: f.write(json.dumps(msg, cls=Encoder) + '\n') f.close()
#!/usr/bin/python # This program removes duplicated messages from a mailbox import mailbox fp = open('/var/spool/mail/gregb') mb = mailbox.UnixMailbox(fp) outfp = open('/tmp/mailbox-gregb', 'w') prev = None msg = mb.next() while msg is not None: try: msgid = msg['Message-ID'] except: msgid = None if (prev is not None) and (msgid is not None) and (prev == msgid): print "Skipping duplicate message", msgid msg = mb.next() continue msg.fp.seek(msg.startofheaders) outfp.write(msg.fp.read()) print "Wrote", msgid msg = mb.next()
if __name__ == '__main__': import networkx as nx try: import matplotlib.pyplot as plt except: pass if len(sys.argv)==1: file="unix_email.mbox" else: file=sys.argv[1] fp=open(file,"r") mbox = mailbox.UnixMailbox(fp, msgfactory) # parse unix mailbox G=nx.MultiDiGraph() # create empty graph # parse each messages and build graph for msg in mbox: # msg is python email.Message.Message object (source_name,source_addr) = parseaddr(msg['From']) # sender # get all recipients # see http://www.python.org/doc/current/lib/module-email.Utils.html tos = msg.get_all('to', []) ccs = msg.get_all('cc', []) resent_tos = msg.get_all('resent-to', []) resent_ccs = msg.get_all('resent-cc', []) all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) # now add the edges for this mail message for (target_name,target_addr) in all_recipients:
def inc(): import Zope2, thread min, max = atoi(sys.argv[3]), atoi(sys.argv[4]) count = max - min try: threads = atoi(sys.argv[5]) except: threads = 1 wait = 0 else: try: wait = atof(sys.argv[6]) except: wait = 0.25 wait = wait * 2 count = count / threads max = min + count omin = min db = Zope2.DB size = db.getSize() mem = VmSize() t = time.time() c = time.clock() mbox = sys.argv[2] argss = [] for i in range(threads): amin = min + i * count dest = 'maili%s' % amin initmaili(dest) f = open(mbox) mb = mailbox.UnixMailbox(f) j = 0 while j < amin: mb.next() j = j + 1 lock = thread.allocate_lock() lock.acquire() def returnf(t, c, size, mem, r, lock=lock): print c, r lock.release() argss.append((lock, (dest, mb, f, count, wait), returnf)) for lock, args, returnf in argss: thread.start_new_thread(do, (Zope2.DB, loadinc, args, returnf)) for lock, args, returnf in argss: lock.acquire() t = time.time() - t c = time.clock() - c size = db.getSize() - size mem = VmSize() - mem print t, c, size, mem #hist("%s-%s-%s" % (omin, count, threads)) Zope2.DB.close()
latest = previous last_processed = None # process updated mbox files for file in glob(archive): if int(previous) >= int(os.stat(file).st_mtime): continue # open gzipped/raw file if file.endswith('.gz'): fh = gzip.open(file) else: fh = open(file) # process each multipart message in the mailbox for msg in iter(mailbox.UnixMailbox(fh, email.message_from_file)): last_processed = msg['Date'] if msg.is_multipart(): detach(msg) elif '919-573-9199' in msg.get_payload(): if '-----BEGIN PGP SIGNATURE-----' in msg.get_payload().split( "\n"): msg.add_header('Content-Disposition', 'attachment', filename='pgp.txt') wrapper = email.message.Message() wrapper.attach(msg) for header in msg.keys(): wrapper[header] = msg[header] detach(wrapper)
import mailbox mb = mailbox.UnixMailbox(open("/var/spool/mail/effbot")) while 1: msg = next(mb) if not msg: break for k, v in list(msg.items()): print(k, "=", v) body = msg.fp.read() print(len(body), "bytes in body") ## subject = for he's a ... ## message-id = <*****@*****.**> ## received = (from [email protected]) ## by spam.egg (8.8.7/8.8.5) id CAA03202 ## for effbot; Fri, 15 Oct 1999 02:27:36 +0200 ## from = Fredrik Lundh <*****@*****.**> ## date = Fri, 15 Oct 1999 12:35:36 +0200 ## to = [email protected] ## 1295 bytes in body
''' mailbox 模块 mailbox 模块用来处理各种不同类型的邮箱格式 大部分邮箱格式使用文本文件储存纯 RFC 822 信息, 用分割行区别不同的信息. ''' import mailbox help(mailbox) mb = mailbox.UnixMailbox(open(''))
def __init__(self, reader): sqmail.gui.fetcher.Fetcher.__init__(self, reader, "Spool Read") filename = sqmail.preferences.get_incomingpath() self.msg("Using spool file "+filename) self.msg("Locking spool file") rv = os.system("lockfile-create --retry 1 "+filename) if rv: self.msg("Failed to lock spool file, aborting") self.do_abort() return self.msg("Opening spool file") fp = open(filename, "r+") fp.seek(0, 2) len = fp.tell() fp.seek(0, 0) if (len == 0): self.msg("Spool file empty. Aborting.") fp.close() os.system("lockfile-remove "+filename) self.do_abort() return mbox = mailbox.UnixMailbox(fp) count = 0 self.msg("Reading messages") while 1: self.progress(fp.tell(), len) msg = sqmail.message.Message() mboxmsg = mbox.next() if not mboxmsg: break msg.loadfrommessage(mboxmsg) msg.savealltodatabase() count = count + 1 if self.abort: self.msg("Aborted!") self.msg("(Duplicate messages remain in spool file.)") break self.msg(str(count)+" message(s) read") if not self.abort: if sqmail.preferences.get_deleteremote(): self.msg("All messages read; truncating spool file") fp.truncate(0) else: self.msg("All messages read. Leaving mail in spool file. " \ "(Fetching again will result in duplicate messages "\ "in your database.)") self.msg("Closing and unlocking spool file") fp.close() os.system("lockfile-remove "+filename) if not self.abort: self.do_abort()