예제 #1
0
def load_from_file():

    if tornado.options.options.init:
        delete_index()
    create_index()

    if tornado.options.options.skip:
        logging.info("Skipping first %d messages from mbox file" %
                     tornado.options.options.skip)

    count = 0
    upload_data = list()
    logging.info("Starting import from file %s" %
                 tornado.options.options.infile)
    mbox = mailbox.PortableUnixMailbox(
        open(tornado.options.options.infile, 'rb'), email.message_from_file)
    #//logging.info("mLen: %d" %mailbox.UnixMailbox.__len__())

    emailParser = DelegatingEmailParser(
        [AmazonEmailParser(), SteamEmailParser()])

    for msg in mbox:
        count += 1

        if not count % 100:
            logging.info("Item %d" % count)

        if count < tornado.options.options.skip:
            continue
        item = convert_msg_to_json(msg)
        if item:
            upload_data.append(item)
            if len(upload_data) == tornado.options.options.batch_size:
                upload_batch(upload_data)
                upload_data = list()

    # upload remaining items in `upload_batch`
    if upload_data:
        upload_batch(upload_data)

    logging.info("Import done - total count %d" % count)
def load_from_file():

    if tornado.options.options.init:
        delete_index()
    create_index()

    if tornado.options.options.skip:
        logging.info("Skipping first %d messages from mbox file" %
                     tornado.options.options.skip)

    count = 0
    upload_data = list()
    logging.info("Starting import from file %s" %
                 tornado.options.options.infile)
    # mbox = mailbox.UnixMailbox(open(tornado.options.options.infile, 'rb'), email.message_from_file)

    # removed the above UnixMailbox which is not supported in python 3.x and replaced it with mailbox.mbox class
    mbox = mailbox.mbox(tornado.options.options.infile)

    emailParser = DelegatingEmailParser(
        [AmazonEmailParser(), SteamEmailParser()])

    for msg in mbox:
        count += 1
        if count < tornado.options.options.skip:
            continue
        item = convert_msg_to_json(msg)

        if item:
            upload_data.append(item)
            if len(upload_data) == tornado.options.options.batch_size:
                upload_batch(upload_data)
                upload_data = list()

    # upload remaining items in `upload_batch`
    if upload_data:
        upload_batch(upload_data)

    logging.info("Import done - total count %d" % count)