class Writer(ColumnWriter):
        def __init__(self, dbfile):
            assert isinstance(dbfile, StructFile)
            self._dbfile = dbfile
            self._count = 0
            self._lengths = GrowableArray(allow_longs=False)

        def __repr__(self):
            return "<VarBytes.Writer>"

        def fill(self, docnum):
            if docnum > self._count:
                self._lengths.extend(0 for _ in xrange(docnum - self._count))

        def add(self, docnum, v):
            self.fill(docnum)
            self._dbfile.write(v)
            self._lengths.append(len(v))
            self._count = docnum + 1

        def finish(self, doccount):
            self.fill(doccount)
            lengths = self._lengths.array

            self._dbfile.write_array(lengths)
            # Write the typecode for the lengths
            self._dbfile.write_byte(ord(lengths.typecode))
Example #2
0
    class Writer(ColumnWriter):
        def __init__(self, dbfile):
            assert isinstance(dbfile, StructFile)
            self._dbfile = dbfile
            self._count = 0
            self._lengths = GrowableArray(allow_longs=False)

        def __repr__(self):
            return "<VarBytes.Writer>"

        def fill(self, docnum):
            if docnum > self._count:
                self._lengths.extend(0 for _ in xrange(docnum - self._count))

        def add(self, docnum, v):
            self.fill(docnum)
            self._dbfile.write(v)
            self._lengths.append(len(v))
            self._count = docnum + 1

        def finish(self, doccount):
            self.fill(doccount)
            lengths = self._lengths.array

            self._dbfile.write_array(lengths)
            # Write the typecode for the lengths
            self._dbfile.write_byte(ord(lengths.typecode))
Example #3
0
    class Writer(ColumnWriter):
        def __init__(self, dbfile, allow_offsets=True, cutoff=2**15):
            assert isinstance(dbfile, StructFile)
            self._dbfile = dbfile
            self._count = 0
            self._lengths = GrowableArray(allow_longs=False)
            self._offsets = GrowableArray(allow_longs=False)
            self._offset_base = 0
            self.allow_offsets = allow_offsets
            self.cutoff = cutoff

        def __repr__(self):
            return "<VarBytes.Writer>"

        def fill(self, docinfo):
            docnum, docbase = docinfo
            base = self._offset_base
            if docnum - docbase > self._count:
                self._lengths.extend(
                    0 for _ in xrange((docnum - docbase) - self._count))
                self._offsets.extend(
                    base for _ in xrange((docnum - docbase) - self._count))

        def add(self, docinfo, v):
            docnum, docbase = docinfo
            self.fill(docinfo)
            self._dbfile.write(v)
            self._lengths.append(len(v))
            self._offsets.append(self._offset_base)
            self._offset_base += len(v)
            self._count = (docnum - docbase) + 1

        def finish(self, docinfo):
            docnum, docbase = docinfo
            dbfile = self._dbfile
            lengths = self._lengths.array
            offsets = self._offsets.array
            self.fill(docinfo)

            dbfile.write_array(lengths)

            # Only write the offsets if there is a large number of items in the
            # column, otherwise it's fast enough to derive them from the lens
            write_offsets = (self.allow_offsets
                             and docnum - docbase > self.cutoff)
            if write_offsets:
                dbfile.write_array(offsets)

            # Backwards compatibility: previous versions only wrote the lengths,
            # and the last byte of the column was the lengths type code...
            dbfile.write(lengths.typecode.encode("ascii"))
            # ...but if we wrote offsets, make the last byte "X" so we know
            if write_offsets:
                dbfile.write(offsets.typecode.encode("ascii"))
                dbfile.write("X".encode("ascii"))
Example #4
0
    class Writer(ColumnWriter):
        def __init__(self, dbfile, allow_offsets=True, cutoff=2**15):
            assert isinstance(dbfile, StructFile)
            self._dbfile = dbfile
            self._count = 0
            self._lengths = GrowableArray(allow_longs=False)
            self._offsets = GrowableArray(allow_longs=False)
            self._offset_base = 0
            self.allow_offsets = allow_offsets
            self.cutoff = cutoff

        def __repr__(self):
            return "<VarBytes.Writer>"

        def fill(self, docnum):
            base = self._offset_base
            if docnum > self._count:
                self._lengths.extend(0 for _ in xrange(docnum - self._count))
                self._offsets.extend(base for _ in xrange(docnum - self._count))

        def add(self, docnum, v):
            self.fill(docnum)
            self._dbfile.write(v)
            self._lengths.append(len(v))
            self._offsets.append(self._offset_base)
            self._offset_base += len(v)
            self._count = docnum + 1

        def finish(self, doccount):
            dbfile = self._dbfile
            lengths = self._lengths.array
            offsets = self._offsets.array
            self.fill(doccount)

            dbfile.write_array(lengths)

            # Only write the offsets if there is a large number of items in the
            # column, otherwise it's fast enough to derive them from the lens
            write_offsets = self.allow_offsets and doccount > self.cutoff
            if write_offsets:
                dbfile.write_array(offsets)

            # Backwards compatibility: previous versions only wrote the lengths,
            # and the last byte of the column was the lengths type code...
            dbfile.write(lengths.typecode.encode("ascii"))
            # ...but if we wrote offsets, make the last byte "X" so we know
            if write_offsets:
                dbfile.write(offsets.typecode.encode("ascii"))
                dbfile.write("X".encode("ascii"))