Example #1
0
def prethread_index_factory_new():
    threader = lazythread_container()
    print '%s - creating msg containers from raw mail' % datetime.now()
    t = time.time()
    all_msgs = (msg_factory(x) for x in mail_grab.iteritems())
    #all_msgs = forkmap.map(msg_factory, mail_grab.iteritems())
    t = time.time() - t
    print "done! took %r seconds" % t

    print '%s - building conversation objects' % datetime.now()
    t = time.time()
    all_msgs = (conv_factory(x) for x in all_msgs)
    t = time.time() - t
    print "done! took %r seconds" % t

    print '%s - threading messages into conversations' % datetime.now()
    t = time.time()
    threader.thread(all_msgs)
    t = time.time() - t
    print "done! took %r seconds" % t

    print '%s - running integrity checker' % datetime.now()
    t = time.time()
    docs = _ensure_threading_integrity(threader, True)
    t = time.time() - t
    print "done! took %r seconds" % t

    print '%s - queueing docs' % datetime.now()
    t = time.time()
    map(xconn.replace, docs)
    xconn.flush()
    t = time.time() - t
    print "done! took %r seconds" % t
    print "%s - waiting for work to finish" % datetime.now()
Example #2
0
def make_doc(msg, threader=None):
    '''
    Build xapian document from a msg_container or from a MaildirMessage
    '''
    srcmesg = None
    if type(msg) is not msg_container:
        if type(msg) is tuple and isinstance(msg[1], MaildirMessage):
            srcmesg = msg[1]
        msg = msg_factory(msg)
    if threader:
        threader.thread([msg])
    doc = xappy.UnprocessedDocument()
    map(partial(_make_doc, doc, msg, srcmesg=srcmesg), msg_fields)
    return doc
Example #3
0
def _ensure_threading_integrity(threader=None, all_new=False):
    if not threader:
        threader = lazythread_container()
        all_msgs = (msg_factory(x) for x in iterdocs())
        #all_msgs = (msg_factory(x) for x in iterdocs(safe=True))
        all_msgs = (conv_factory(x) for x in all_msgs)
        threader.thread(all_msgs)

    to_update = []
    to_replace = []

    def ctid_to_mtid(conv):
        ctid = conv.thread
        for msg in conv.messages:
            id_data_tple = (msg, [('thread', ctid)]) #optimization: pass msg_container so we don't have to rebuild it again
            #id_data_tple = (msg.muuid, [('thread', ctid)])
            if not msg.thread:
                to_update.append(id_data_tple)
            elif ctid != msg.thread:
                to_replace.append(id_data_tple)

    map(ctid_to_mtid, threader)
    print "in update queue  %i" % len(to_update)
    print "in replace queue %i" % len(to_replace)
    print '%s - starting modify factory on to_update' % datetime.now()
    docs1 = modify_factory(to_update, update_existing, all_new)
    print '%s - starting modify factory on to_replace' % datetime.now()
    docs2 = modify_factory(to_replace, replace_existing, all_new)
    def chn_gen(gg):
        it = gg.next()
        while 1:
            try: r = it.next()
            except StopIteration:
                try:
                    it = gg.next()
                    continue
                except StopIteration:
                    break
            yield r

    docs = chn_gen( (x for x in [docs1, docs2]) )
    return docs