Esempio n. 1
0
    def __init__(self,
                 ix,
                 postlimit=32 * 1024 * 1024,
                 blocklimit=128,
                 timeout=0.0,
                 delay=0.1):
        """
        :param ix: the Index object you want to write to.
        :param postlimit: Essentially controls the maximum amount of memory the
            indexer uses at a time, in bytes (the actual amount of memory used
            by the Python process will be much larger because of other
            overhead). The default (32MB) is a bit small. You may want to
            increase this value for very large collections, e.g.
            ``postlimit=256*1024*1024``.
        """

        self.lock = ix.storage.lock(ix.indexname + "_LOCK")
        if not try_for(self.lock.acquire, timeout=timeout, delay=delay):
            raise LockError("Index %s is already locked for writing")

        self.index = ix
        self.segments = ix.segments.copy()
        self.postlimit = postlimit
        self.blocklimit = blocklimit
        self._segment_writer = None
        self._searcher = ix.searcher()
Esempio n. 2
0
    def __init__(self, ix, poolclass=None, timeout=0.0, delay=0.1, _lk=True,
                 limitmb=128, docbase=0, codec=None, **kwargs):
        # Lock the index
        self.writelock = None
        if _lk:
            self.writelock = ix.lock("WRITELOCK")
            if not try_for(self.writelock.acquire, timeout=timeout,
                           delay=delay):
                raise LockError

        # Get info from the index
        self.storage = ix.storage
        self.indexname = ix.indexname
        info = ix._read_toc()
        self.generation = info.generation + 1
        self.schema = info.schema
        self.segments = info.segments
        self.docnum = self.docbase = docbase
        self._setup_doc_offsets()

        # Internals
        poolprefix = "whoosh_%s_" % self.indexname
        self.pool = PostingPool(limitmb=limitmb, prefix=poolprefix)
        self.newsegment = Segment(self.indexname, 0)
        self.is_closed = False
        self._added = False

        # Set up writers
        if codec is None:
            from whoosh.codec.standard import StdCodec
            codec = StdCodec(self.storage)
        self.codec = codec
        self.perdocwriter = codec.per_document_writer(self.newsegment)
        self.fieldwriter = codec.field_writer(self.newsegment)
Esempio n. 3
0
 def __init__(self, ix, postlimit = 32 * 1024 * 1024, blocklimit=128, timeout=0.0, delay=0.1):
     """
     :param ix: the Index object you want to write to.
     :param postlimit: Essentially controls the maximum amount of memory the
         indexer uses at a time, in bytes (the actual amount of memory used by
         the Python process will be much larger because of other overhead).
         The default (32MB) is a bit small. You may want to increase this value
         for very large collections, e.g. ``postlimit=256*1024*1024``.
     """
     
     self.lock = ix.storage.lock(ix.indexname + "_LOCK")
     if not try_for(self.lock.acquire, timeout=timeout, delay=delay):
         raise LockError("Index %s is already locked for writing")
     
     self.index = ix
     self.segments = ix.segments.copy()
     self.postlimit = postlimit
     self.blocklimit = blocklimit
     self._segment_writer = None
     self._searcher = ix.searcher()
Esempio n. 4
0
 def fn():
     lock2 = st.lock("testlock")
     gotit = try_for(lock2.acquire, 1.0, 0.1)
     if gotit:
         result.append(True)
         lock2.release()
Esempio n. 5
0
    def __init__(self, ix, poolclass=None, procs=0, blocklimit=128,
                 timeout=0.0, delay=0.1, name=None, _l=True, **poolargs):

        self.writelock = None
        if _l:
            self.writelock = ix.lock("WRITELOCK")
            if not try_for(self.writelock.acquire, timeout=timeout, delay=delay):
                raise LockError
        self.readlock = ix.lock("READLOCK")

        info = ix._read_toc()
        self.schema = info.schema
        self.segments = info.segments
        self.storage = ix.storage
        self.indexname = ix.indexname
        self.is_closed = False

        self.blocklimit = blocklimit
        self.segment_number = info.segment_counter + 1
        self.generation = info.generation + 1

        self._doc_offsets = []
        base = 0
        for s in self.segments:
            self._doc_offsets.append(base)
            base += s.doc_count_all()

        self.name = name or Segment.basename(self.indexname, self.segment_number)
        self.docnum = 0
        self.fieldlength_totals = defaultdict(int)
        self._added = False
        self._unique_cache = {}

        # Create a temporary segment to use its .*_filename attributes
        segment = Segment(self.name, self.generation, 0, None, None)

        # Terms index
        tf = self.storage.create_file(segment.termsindex_filename)
        ti = TermIndexWriter(tf)
        # Term postings file
        pf = self.storage.create_file(segment.termposts_filename)
        pw = FilePostingWriter(pf, blocklimit=blocklimit)
        # Terms writer
        self.termswriter = TermsWriter(self.schema, ti, pw)

        if self.schema.has_vectored_fields():
            # Vector index
            vf = self.storage.create_file(segment.vectorindex_filename)
            self.vectorindex = TermVectorWriter(vf)

            # Vector posting file
            vpf = self.storage.create_file(segment.vectorposts_filename)
            self.vpostwriter = FilePostingWriter(vpf, stringids=True)
        else:
            self.vectorindex = None
            self.vpostwriter = None

        # Stored fields file
        sf = self.storage.create_file(segment.storedfields_filename)
        self.storedfields = StoredFieldWriter(sf, self.schema.stored_names())

        # Field lengths file
        self.lengthfile = self.storage.create_file(segment.fieldlengths_filename)

        # Create the pool
        if poolclass is None:
            if procs > 1:
                from whoosh.filedb.multiproc import MultiPool
                poolclass = MultiPool
            else:
                poolclass = TempfilePool
        self.pool = poolclass(self.schema, procs=procs, **poolargs)
Esempio n. 6
0
 def fn():
     lock2 = st.lock("testlock")
     gotit = try_for(lock2.acquire, 1.0, 0.1)
     if gotit:
         result.append(True)
         lock2.release()