def __init__(self, storage, schema, segment): self.storage = storage self.schema = schema self.segment = segment if hasattr(self.segment, "uuid"): self.uuid_string = str(self.segment.uuid) else: import uuid self.uuid_string = str(uuid.uuid4()) # Term index tf = storage.open_file(segment.termsindex_filename) self.termsindex = TermIndexReader(tf) # Term vector index, and vector postings: lazy load self.vectorindex = None self.vpostfile = None # Stored fields file sf = storage.open_file(segment.storedfields_filename, mapped=False) self.storedfields = StoredFieldReader(sf) # Field length file self.fieldlengths = None if self.schema.has_scorable_fields(): flf = storage.open_file(segment.fieldlengths_filename) self.fieldlengths = LengthReader(flf, segment.doc_count_all()) # Copy info from underlying segment self._has_deletions = segment.has_deletions() self._doc_count = segment.doc_count() # Postings file self.postfile = self.storage.open_file(segment.termposts_filename, mapped=False) # Dawg file self.dawg = None if any(field.spelling for field in self.schema): fname = segment.dawg_filename if self.storage.file_exists(fname): dawgfile = self.storage.open_file(fname, mapped=False) self.dawg = DiskNode.load(dawgfile, expand=False) self.dc = segment.doc_count_all() assert self.dc == self.storedfields.length self.set_caching_policy() self.is_closed = False self._sync_lock = Lock()
def word_graph(self, segment): dawgfile = segment.open_file(self.storage, self.DAWG_EXT, mapped=False) return DiskNode.load(dawgfile, expand=False)