def load_from_file(): if tornado.options.options.init: delete_index() create_index() if tornado.options.options.skip: logging.info("Skipping first %d messages from mbox file" % tornado.options.options.skip) count = 0 upload_data = list() logging.info("Starting import from file %s" % tornado.options.options.infile) mbox = mailbox.PortableUnixMailbox( open(tornado.options.options.infile, 'rb'), email.message_from_file) #//logging.info("mLen: %d" %mailbox.UnixMailbox.__len__()) emailParser = DelegatingEmailParser( [AmazonEmailParser(), SteamEmailParser()]) for msg in mbox: count += 1 if not count % 100: logging.info("Item %d" % count) if count < tornado.options.options.skip: continue item = convert_msg_to_json(msg) if item: upload_data.append(item) if len(upload_data) == tornado.options.options.batch_size: upload_batch(upload_data) upload_data = list() # upload remaining items in `upload_batch` if upload_data: upload_batch(upload_data) logging.info("Import done - total count %d" % count)
def load_from_file(): if tornado.options.options.init: delete_index() create_index() if tornado.options.options.skip: logging.info("Skipping first %d messages from mbox file" % tornado.options.options.skip) count = 0 upload_data = list() logging.info("Starting import from file %s" % tornado.options.options.infile) # mbox = mailbox.UnixMailbox(open(tornado.options.options.infile, 'rb'), email.message_from_file) # removed the above UnixMailbox which is not supported in python 3.x and replaced it with mailbox.mbox class mbox = mailbox.mbox(tornado.options.options.infile) emailParser = DelegatingEmailParser( [AmazonEmailParser(), SteamEmailParser()]) for msg in mbox: count += 1 if count < tornado.options.options.skip: continue item = convert_msg_to_json(msg) if item: upload_data.append(item) if len(upload_data) == tornado.options.options.batch_size: upload_batch(upload_data) upload_data = list() # upload remaining items in `upload_batch` if upload_data: upload_batch(upload_data) logging.info("Import done - total count %d" % count)