def test_roundtrip(): _rt(columns.VarBytesColumn(), [b("a"), b("ccc"), b("bbb"), b("e"), b("dd")], b("")) _rt(columns.FixedBytesColumn(5), [ b("aaaaa"), b("eeeee"), b("ccccc"), b("bbbbb"), b("eeeee") ], b("\x00") * 5) _rt(columns.RefBytesColumn(), [b("a"), b("ccc"), b("bb"), b("ccc"), b("a"), b("bb")], b("")) _rt(columns.RefBytesColumn(3), [b("aaa"), b("bbb"), b("ccc"), b("aaa"), b("bbb"), b("ccc")], b("\x00") * 3) _rt(columns.StructColumn("ifH", (0, 0.0, 0)), [(100, 1.5, 15000), (-100, -5.0, 0), (5820, 6.5, 462), (-57829, -1.5, 6), (0, 0, 0)], (0, 0.0, 0)) numcol = columns.NumericColumn _rt(numcol("b"), [10, -20, 30, -25, 15], 0) _rt(numcol("B"), [10, 20, 30, 25, 15], 0) _rt(numcol("h"), [1000, -2000, 3000, -15000, 32000], 0) _rt(numcol("H"), [1000, 2000, 3000, 15000, 50000], 0) _rt(numcol("i"), [2**16, -(2**20), 2**24, -(2**28), 2**30], 0) _rt(numcol("I"), [2**16, 2**20, 2**24, 2**28, 2**31 & 0xFFFFFFFF], 0) _rt(numcol("q"), [10, -20, 30, -25, 15], 0) _rt(numcol("Q"), [2**35, 2**40, 2**48, 2**52, 2**63], 0) _rt(numcol("f"), [1.5, -2.5, 3.5, -4.5, 1.25], 0) _rt(numcol("d"), [1.5, -2.5, 3.5, -4.5, 1.25], 0) c = columns.BitColumn(compress_at=10) _rt(c, [bool(random.randint(0, 1)) for _ in xrange(70)], False) _rt(c, [bool(random.randint(0, 1)) for _ in xrange(90)], False) c = columns.PickleColumn(columns.VarBytesColumn()) _rt(c, [None, True, False, 100, -7, "hello"], None) c = columns.VarBytesListColumn() _rt(c, [[b('garnet'), b('amethyst')], [b('pearl')]], []) c = columns.VarBytesListColumn() c = columns.FixedBytesListColumn(4) _rt(c, [[b('garn'), b('amet')], [b('pear')]], [])
import zlib except ImportError: zlib = None # This byte sequence is written at the start of a posting list to identify the # codec/version WHOOSH3_HEADER_MAGIC = b("W3Bl") # Column type to store field length info LENGTHS_COLUMN = columns.NumericColumn("B", default=0) # Column type to store pointers to vector posting lists VECTOR_COLUMN = columns.NumericColumn("I") # Column type to store vector posting list lengths VECTOR_LEN_COLUMN = columns.NumericColumn("i") # Column type to store values of stored fields STORED_COLUMN = columns.PickleColumn(columns.CompressedBytesColumn()) class W3Codec(base.CodecWithGraph): # File extensions TERMS_EXT = ".trm" # Term index POSTS_EXT = ".pst" # Term postings VPOSTS_EXT = ".vps" # Vector postings COLUMN_EXT = ".col" # Per-document value columns def __init__(self, blocklimit=128, compression=3, inlinelimit=1): self._blocklimit = blocklimit self._compression = compression self._inlinelimit = inlinelimit # Per-document value writer