예제 #1
0
파일: xindex.py 프로젝트: dlobue/nara
def prethread_index_factory_new():
    threader = lazythread_container()
    print '%s - creating msg containers from raw mail' % datetime.now()
    t = time.time()
    all_msgs = (msg_factory(x) for x in mail_grab.iteritems())
    #all_msgs = forkmap.map(msg_factory, mail_grab.iteritems())
    t = time.time() - t
    print "done! took %r seconds" % t

    print '%s - building conversation objects' % datetime.now()
    t = time.time()
    all_msgs = (conv_factory(x) for x in all_msgs)
    t = time.time() - t
    print "done! took %r seconds" % t

    print '%s - threading messages into conversations' % datetime.now()
    t = time.time()
    threader.thread(all_msgs)
    t = time.time() - t
    print "done! took %r seconds" % t

    print '%s - running integrity checker' % datetime.now()
    t = time.time()
    docs = _ensure_threading_integrity(threader, True)
    t = time.time() - t
    print "done! took %r seconds" % t

    print '%s - queueing docs' % datetime.now()
    t = time.time()
    map(xconn.replace, docs)
    xconn.flush()
    t = time.time() - t
    print "done! took %r seconds" % t
    print "%s - waiting for work to finish" % datetime.now()
예제 #2
0
파일: xindex.py 프로젝트: dlobue/nara
def _preindex_thread(msgs):
    tracker = {}
    for msg in msgs:
        if msg.thread: raise ValueError('This is weird. why am i trying to thread something that already has a thread id?')
        tracker[msg.msgid[0]] = msg

    threader = lazythread_container()
    all_msgs = iterdocs()
    if all_msgs:
        print 'yes to all_msgs'
        all_msgs = map(msg_factory, all_msgs)
        all_msgs = threadmap.map(conv_factory, all_msgs)
        threader.thread(all_msgs)
    print 'threader b4 convs %s' % len(threader)
    threader.thread( threadmap.map(conv_factory, msgs) )
    print 'threader after convs %s' % len(threader)

    c = 0
    ct = 0
    for conv in threader:
        ct+= len(conv.messages)
        if len(conv.messages) == 1:
            c+=1

    print "found %s threads with one msg" % c
    print "threader contains %s messages out of a total %s" % (ct, len(msgs))

    c = 0
    if all_msgs:
        for key in tracker:
            msg = tracker[key]
            try:
                try: conv = threader[key]
                except: conv = threader[msg.subject[0]]
            except:
                c+=1
                '''
                print "unable to find msg in thread"
                print key
                print msg.subject[0]
                '''
                continue
            threadid = conv.thread
            msg.thread.extend(threadid)
        print "%s messages didn't find a thread" % c
    return msgs, threader
예제 #3
0
파일: xindex.py 프로젝트: dlobue/nara
def _ensure_threading_integrity(threader=None, all_new=False):
    if not threader:
        threader = lazythread_container()
        all_msgs = (msg_factory(x) for x in iterdocs())
        #all_msgs = (msg_factory(x) for x in iterdocs(safe=True))
        all_msgs = (conv_factory(x) for x in all_msgs)
        threader.thread(all_msgs)

    to_update = []
    to_replace = []

    def ctid_to_mtid(conv):
        ctid = conv.thread
        for msg in conv.messages:
            id_data_tple = (msg, [('thread', ctid)]) #optimization: pass msg_container so we don't have to rebuild it again
            #id_data_tple = (msg.muuid, [('thread', ctid)])
            if not msg.thread:
                to_update.append(id_data_tple)
            elif ctid != msg.thread:
                to_replace.append(id_data_tple)

    map(ctid_to_mtid, threader)
    print "in update queue  %i" % len(to_update)
    print "in replace queue %i" % len(to_replace)
    print '%s - starting modify factory on to_update' % datetime.now()
    docs1 = modify_factory(to_update, update_existing, all_new)
    print '%s - starting modify factory on to_replace' % datetime.now()
    docs2 = modify_factory(to_replace, replace_existing, all_new)
    def chn_gen(gg):
        it = gg.next()
        while 1:
            try: r = it.next()
            except StopIteration:
                try:
                    it = gg.next()
                    continue
                except StopIteration:
                    break
            yield r

    docs = chn_gen( (x for x in [docs1, docs2]) )
    return docs