Ejemplo n.º 1
0
    def _modify_index(self, index, schema, wikiname, revids, mode='add', procs=1, limitmb=256):
        """
        modify index contents - add, update, delete the indexed documents for all given revids

        Note: mode == 'add' is faster but you need to make sure to not create duplicate
              documents in the index.
        """
        if procs == 1:
            # MultiSegmentWriter sometimes has issues and is pointless for procs == 1,
            # so use the simple writer when --procs 1 is given:
            writer = index.writer()
        else:
            writer = MultiSegmentWriter(index, procs, limitmb)
        with writer as writer:
            for revid in revids:
                if mode in ['add', 'update', ]:
                    meta, data = self.backend.retrieve(revid)
                    content = convert_to_indexable(meta, data)
                    doc = backend_to_index(meta, content, schema, wikiname)
                if mode == 'update':
                    writer.update_document(**doc)
                elif mode == 'add':
                    writer.add_document(**doc)
                elif mode == 'delete':
                    writer.delete_by_term(REVID, revid)
                else:
                    raise ValueError("mode must be 'update', 'add' or 'delete', not '%s'" % mode)
Ejemplo n.º 2
0
    def indexer(self, create=True):
        schema = self.bench.spec.whoosh_schema()
        path = os.path.join(self.options.dir,
                            "%s_whoosh" % self.options.indexname)

        if not os.path.exists(path):
            os.mkdir(path)
        if create:
            ix = index.create_in(path, schema)
        else:
            ix = index.open_dir(path)

        poolclass = None
        if self.options.pool:
            poolclass = find_object(self.options.pool)

        kwargs = dict(limitmb=int(self.options.limitmb),
                      poolclass=poolclass,
                      dir=self.options.tempdir,
                      procs=int(self.options.procs),
                      batchsize=int(self.options.batch))

        if self.options.expw:
            from whoosh.filedb.multiproc import MultiSegmentWriter
            self.writer = MultiSegmentWriter(ix, **kwargs)
        else:
            self.writer = ix.writer(**kwargs)

        self._procdoc = None
        if hasattr(self.bench.spec, "process_document_whoosh"):
            self._procdoc = self.bench.spec.process_document_whoosh
Ejemplo n.º 3
0
	def set_multiSegmentWriter(self, limitmb = 128, procs = 4):
		self.__isMultiSegment = True
		self.writer = MultiSegmentWriter(self.index, limitmb, procs)
Ejemplo n.º 4
0
def test_multisegwriter():
    from whoosh.filedb.multiproc import MultiSegmentWriter
    _check_writer("multisegw", lambda ix: MultiSegmentWriter(ix, procs=4))