def prethread_index_factory_new(): threader = lazythread_container() print '%s - creating msg containers from raw mail' % datetime.now() t = time.time() all_msgs = (msg_factory(x) for x in mail_grab.iteritems()) #all_msgs = forkmap.map(msg_factory, mail_grab.iteritems()) t = time.time() - t print "done! took %r seconds" % t print '%s - building conversation objects' % datetime.now() t = time.time() all_msgs = (conv_factory(x) for x in all_msgs) t = time.time() - t print "done! took %r seconds" % t print '%s - threading messages into conversations' % datetime.now() t = time.time() threader.thread(all_msgs) t = time.time() - t print "done! took %r seconds" % t print '%s - running integrity checker' % datetime.now() t = time.time() docs = _ensure_threading_integrity(threader, True) t = time.time() - t print "done! took %r seconds" % t print '%s - queueing docs' % datetime.now() t = time.time() map(xconn.replace, docs) xconn.flush() t = time.time() - t print "done! took %r seconds" % t print "%s - waiting for work to finish" % datetime.now()
def _ensure_threading_integrity(threader=None, all_new=False): if not threader: threader = lazythread_container() all_msgs = (msg_factory(x) for x in iterdocs()) #all_msgs = (msg_factory(x) for x in iterdocs(safe=True)) all_msgs = (conv_factory(x) for x in all_msgs) threader.thread(all_msgs) to_update = [] to_replace = [] def ctid_to_mtid(conv): ctid = conv.thread for msg in conv.messages: id_data_tple = (msg, [('thread', ctid)]) #optimization: pass msg_container so we don't have to rebuild it again #id_data_tple = (msg.muuid, [('thread', ctid)]) if not msg.thread: to_update.append(id_data_tple) elif ctid != msg.thread: to_replace.append(id_data_tple) map(ctid_to_mtid, threader) print "in update queue %i" % len(to_update) print "in replace queue %i" % len(to_replace) print '%s - starting modify factory on to_update' % datetime.now() docs1 = modify_factory(to_update, update_existing, all_new) print '%s - starting modify factory on to_replace' % datetime.now() docs2 = modify_factory(to_replace, replace_existing, all_new) def chn_gen(gg): it = gg.next() while 1: try: r = it.next() except StopIteration: try: it = gg.next() continue except StopIteration: break yield r docs = chn_gen( (x for x in [docs1, docs2]) ) return docs