def _read_ids(self, offset, postcount): pf = self.postfile if self.stringids: pf.seek(offset) rs = pf.read_string ids = [utf8decode(rs())[0] for _ in xrange(postcount)] offset = pf.tell() else: ids = pf.get_array(offset, "I", postcount) offset += _INT_SIZE * postcount return (ids, offset)
def decode_posting(posting): """Decodes an encoded posting string into a (field_number, text, document_number, datastring) tuple. """ fieldnum = unpack_ushort(posting[:_USHORT_SIZE])[0] zero = posting.find(chr(0), _USHORT_SIZE) text = utf8decode(posting[_USHORT_SIZE:zero])[0] metastart = zero + 1 metaend = metastart + _INT_SIZE * 2 doc, freq = unpack2ints(posting[metastart:metaend]) datastring = posting[metaend:] return fieldnum, text, doc, freq, datastring
def _read_block_header(self, offset): pf = self.postfile if self.stringids: pf.seek(offset) maxid = utf8decode(pf.read_string())[0] offset = pf.tell() else: maxid = pf.get_uint(offset) offset = offset + _INT_SIZE nextoffset = pf.get_uint(offset) offset += _INT_SIZE postcount = pf.get_byte(offset) assert postcount > 0 offset += 1 return (maxid, nextoffset, postcount, offset)
def decode_termkey(key): return unpackushort(key[:_USHORT_SIZE]), utf8decode(key[_USHORT_SIZE:])[0]