def add_row(self, key, data): # Note: call this AFTER you add any postings! # Keys must be added in increasing order if key <= self.lastkey: raise IndexError("Keys must increase: %r..%r" % (self.lastkey, key)) rb = self.rowbuffer if isinstance(data, array): self.blockfilled += len(data) * data.itemsize else: # Ugh! We're pickling twice! At least it's fast. self.blockfilled += len(dump_pickle_str(data, -1)) self.lastkey = key if self.haspostings: # Add the posting info to the stored row data endoffset = self.posting_file.tell() length = endoffset - self.offset rb.append((key, (self.offset, length, self.postcount, data))) # Reset the posting variables self.offset = endoffset self.postcount = 0 self.lastpostid = None else: rb.append((key, data)) # If this row filled up a block, flush it out if self.blockfilled >= self.blocksize: self._write_block()
def _write_block(self): buf = self.rowbuffer key = buf[0][0] compressed = self.compressed self.dir.append((key, self.table_file.tell())) if compressed: pck = dump_pickle_str(buf, -1) self.table_file.write_string(compress(pck, compressed)) else: self.table_file.write_pickle(buf) self.rowbuffer = [] self.blockfilled = 0