def _modify_index(self, index, schema, wikiname, revids, mode='add', procs=1, limitmb=256): """ modify index contents - add, update, delete the indexed documents for all given revids Note: mode == 'add' is faster but you need to make sure to not create duplicate documents in the index. """ if procs == 1: # MultiSegmentWriter sometimes has issues and is pointless for procs == 1, # so use the simple writer when --procs 1 is given: writer = index.writer() else: writer = MultiSegmentWriter(index, procs, limitmb) with writer as writer: for revid in revids: if mode in ['add', 'update', ]: meta, data = self.backend.retrieve(revid) content = convert_to_indexable(meta, data) doc = backend_to_index(meta, content, schema, wikiname) if mode == 'update': writer.update_document(**doc) elif mode == 'add': writer.add_document(**doc) elif mode == 'delete': writer.delete_by_term(REVID, revid) else: raise ValueError("mode must be 'update', 'add' or 'delete', not '%s'" % mode)
def indexer(self, create=True): schema = self.bench.spec.whoosh_schema() path = os.path.join(self.options.dir, "%s_whoosh" % self.options.indexname) if not os.path.exists(path): os.mkdir(path) if create: ix = index.create_in(path, schema) else: ix = index.open_dir(path) poolclass = None if self.options.pool: poolclass = find_object(self.options.pool) kwargs = dict(limitmb=int(self.options.limitmb), poolclass=poolclass, dir=self.options.tempdir, procs=int(self.options.procs), batchsize=int(self.options.batch)) if self.options.expw: from whoosh.filedb.multiproc import MultiSegmentWriter self.writer = MultiSegmentWriter(ix, **kwargs) else: self.writer = ix.writer(**kwargs) self._procdoc = None if hasattr(self.bench.spec, "process_document_whoosh"): self._procdoc = self.bench.spec.process_document_whoosh
def set_multiSegmentWriter(self, limitmb = 128, procs = 4): self.__isMultiSegment = True self.writer = MultiSegmentWriter(self.index, limitmb, procs)
def test_multisegwriter(): from whoosh.filedb.multiproc import MultiSegmentWriter _check_writer("multisegw", lambda ix: MultiSegmentWriter(ix, procs=4))