def __init__(self, ix, postlimit=32 * 1024 * 1024, blocklimit=128, timeout=0.0, delay=0.1): """ :param ix: the Index object you want to write to. :param postlimit: Essentially controls the maximum amount of memory the indexer uses at a time, in bytes (the actual amount of memory used by the Python process will be much larger because of other overhead). The default (32MB) is a bit small. You may want to increase this value for very large collections, e.g. ``postlimit=256*1024*1024``. """ self.lock = ix.storage.lock(ix.indexname + "_LOCK") if not try_for(self.lock.acquire, timeout=timeout, delay=delay): raise LockError("Index %s is already locked for writing") self.index = ix self.segments = ix.segments.copy() self.postlimit = postlimit self.blocklimit = blocklimit self._segment_writer = None self._searcher = ix.searcher()
def __init__(self, ix, poolclass=None, timeout=0.0, delay=0.1, _lk=True, limitmb=128, docbase=0, codec=None, **kwargs): # Lock the index self.writelock = None if _lk: self.writelock = ix.lock("WRITELOCK") if not try_for(self.writelock.acquire, timeout=timeout, delay=delay): raise LockError # Get info from the index self.storage = ix.storage self.indexname = ix.indexname info = ix._read_toc() self.generation = info.generation + 1 self.schema = info.schema self.segments = info.segments self.docnum = self.docbase = docbase self._setup_doc_offsets() # Internals poolprefix = "whoosh_%s_" % self.indexname self.pool = PostingPool(limitmb=limitmb, prefix=poolprefix) self.newsegment = Segment(self.indexname, 0) self.is_closed = False self._added = False # Set up writers if codec is None: from whoosh.codec.standard import StdCodec codec = StdCodec(self.storage) self.codec = codec self.perdocwriter = codec.per_document_writer(self.newsegment) self.fieldwriter = codec.field_writer(self.newsegment)
def __init__(self, ix, postlimit = 32 * 1024 * 1024, blocklimit=128, timeout=0.0, delay=0.1): """ :param ix: the Index object you want to write to. :param postlimit: Essentially controls the maximum amount of memory the indexer uses at a time, in bytes (the actual amount of memory used by the Python process will be much larger because of other overhead). The default (32MB) is a bit small. You may want to increase this value for very large collections, e.g. ``postlimit=256*1024*1024``. """ self.lock = ix.storage.lock(ix.indexname + "_LOCK") if not try_for(self.lock.acquire, timeout=timeout, delay=delay): raise LockError("Index %s is already locked for writing") self.index = ix self.segments = ix.segments.copy() self.postlimit = postlimit self.blocklimit = blocklimit self._segment_writer = None self._searcher = ix.searcher()
def fn(): lock2 = st.lock("testlock") gotit = try_for(lock2.acquire, 1.0, 0.1) if gotit: result.append(True) lock2.release()
def __init__(self, ix, poolclass=None, procs=0, blocklimit=128, timeout=0.0, delay=0.1, name=None, _l=True, **poolargs): self.writelock = None if _l: self.writelock = ix.lock("WRITELOCK") if not try_for(self.writelock.acquire, timeout=timeout, delay=delay): raise LockError self.readlock = ix.lock("READLOCK") info = ix._read_toc() self.schema = info.schema self.segments = info.segments self.storage = ix.storage self.indexname = ix.indexname self.is_closed = False self.blocklimit = blocklimit self.segment_number = info.segment_counter + 1 self.generation = info.generation + 1 self._doc_offsets = [] base = 0 for s in self.segments: self._doc_offsets.append(base) base += s.doc_count_all() self.name = name or Segment.basename(self.indexname, self.segment_number) self.docnum = 0 self.fieldlength_totals = defaultdict(int) self._added = False self._unique_cache = {} # Create a temporary segment to use its .*_filename attributes segment = Segment(self.name, self.generation, 0, None, None) # Terms index tf = self.storage.create_file(segment.termsindex_filename) ti = TermIndexWriter(tf) # Term postings file pf = self.storage.create_file(segment.termposts_filename) pw = FilePostingWriter(pf, blocklimit=blocklimit) # Terms writer self.termswriter = TermsWriter(self.schema, ti, pw) if self.schema.has_vectored_fields(): # Vector index vf = self.storage.create_file(segment.vectorindex_filename) self.vectorindex = TermVectorWriter(vf) # Vector posting file vpf = self.storage.create_file(segment.vectorposts_filename) self.vpostwriter = FilePostingWriter(vpf, stringids=True) else: self.vectorindex = None self.vpostwriter = None # Stored fields file sf = self.storage.create_file(segment.storedfields_filename) self.storedfields = StoredFieldWriter(sf, self.schema.stored_names()) # Field lengths file self.lengthfile = self.storage.create_file(segment.fieldlengths_filename) # Create the pool if poolclass is None: if procs > 1: from whoosh.filedb.multiproc import MultiPool poolclass = MultiPool else: poolclass = TempfilePool self.pool = poolclass(self.schema, procs=procs, **poolargs)