def _write_block(self): posting_size = self.format.posting_size stringids = self.stringids pf = self.postfile ids = self.blockids values = self.blockvalues postcount = len(ids) if stringids: pf.write_string(utf8encode(ids[-1])[0]) else: pf.write_uint(ids[-1]) startoffset = pf.tell() # Place holder for pointer to next block pf.write_uint(0) # Write the number of postings in this block pf.write_byte(postcount) if stringids: for id in ids: pf.write_string(utf8encode(id)[0]) else: pf.write_array(ids) if posting_size < 0: # Write array of value lengths lengths = array("I") for valuestring in values: lengths.append(len(valuestring)) pf.write_array(lengths) if posting_size != 0: pf.write("".join(values)) # Seek back and write the pointer to the next block pf.flush() nextoffset = pf.tell() pf.seek(startoffset) pf.write_uint(nextoffset) pf.seek(nextoffset) self.posttotal += postcount self._reset_block() self.blockcount += 1
def encode_posting(fieldnum, text, doc, freq, datastring): """Encodes a posting as a string, for sorting. """ return "".join([pack_ushort(fieldnum), utf8encode(text)[0], chr(0), pack2ints(doc, freq), datastring ])
def encode_posting(fieldnum, text, doc, freq, datastring): """Encodes a posting as a string, for sorting. """ return "".join([ pack_ushort(fieldnum), utf8encode(text)[0], chr(0), pack2ints(doc, freq), datastring ])
def encode_termkey(term): fieldnum, text = term return packushort(fieldnum) + utf8encode(text)[0]