def from_file(cls, postfile, postingsize, stringids=False): start = postfile.tell() # Read the block header information from the posting file header = cls._struct.unpack(postfile.read(cls._struct.size)) # Create the base block object block = cls(postfile, postingsize, stringids=stringids, maxweight=header[7], maxwol=header[8], maxlength=header[11], minlength=header[12]) # Fill in the attributes needed by this block implementation block.nextoffset = start + header[0] block.compression = header[1] block.postcount = header[2] block.typecode = header[3] block.idslen = header[5] block.weightslen = header[6] if PY3: block.typecode = block.typecode.decode('latin-1') # Read the "maximum ID" part of the header, based on whether we're # using string IDs if stringids: block.maxid = load(postfile) else: block.maxid = postfile.read_uint() # The position after the header block.dataoffset = postfile.tell() return block
def _add_file(self, args): writer = self.writer filename, length = args f = open(filename, "rb") for _ in xrange(length): writer.add_document(**load(f)) f.close() os.remove(filename)
def _read_run(self, path): f = self._open_run(path) try: while True: yield load(f) except EOFError: return finally: f.close() self._remove_run(path)
def _read_run(path): import os.path f = open(path, "rb") try: while True: yield load(f) except EOFError: return finally: f.close() os.remove(path)
def _add_file(self, filename, length): subpool = self.subpool f = open(filename, "rb") for _ in xrange(length): code, args = load(f) if code == 0: subpool.add_content(*args) elif code == 1: subpool.add_posting(*args) elif code == 2: subpool.add_field_length(*args) f.close() os.remove(filename)
def from_file(cls, postfile, postingsize, stringids=False): start = postfile.tell() block = cls(postingsize, stringids=stringids) block.postfile = postfile header = cls._struct.unpack(postfile.read(cls._struct.size)) block.nextoffset = start + header[0] block.cmp = header[1] block.count = header[2] block.idcode = header[3] block.idslen = header[5] block.wtslen = header[6] block.maxweight = header[7] block.maxlength = byte_to_length(header[11]) block.minlength = byte_to_length(header[12]) block.maxid = load(postfile) if stringids else postfile.read_uint() block.dataoffset = postfile.tell() return block
def from_file(cls, postfile, postingsize, stringids=False): start = postfile.tell() block = cls(postingsize, stringids=stringids) block.postfile = postfile header = cls._struct.unpack(postfile.read(cls._struct.size)) block.nextoffset = start + header[0] block.cmp = header[1] block.count = header[2] block.idcode = header[3].decode("Latin1") block.idslen = header[5] block.wtslen = header[6] block.maxweight = header[7] block.maxlength = byte_to_length(header[11]) block.minlength = byte_to_length(header[12]) block.maxid = load(postfile) if stringids else postfile.read_uint() block.dataoffset = postfile.tell() return block