Beispiel #1
0
def test_roundtrip():
    _rt(columns.VarBytesColumn(),
        [b("a"), b("ccc"), b("bbb"),
         b("e"), b("dd")], b(""))
    _rt(columns.FixedBytesColumn(5), [
        b("aaaaa"), b("eeeee"),
        b("ccccc"), b("bbbbb"),
        b("eeeee")
    ],
        b("\x00") * 5)
    _rt(columns.RefBytesColumn(),
        [b("a"), b("ccc"),
         b("bb"), b("ccc"),
         b("a"), b("bb")], b(""))
    _rt(columns.RefBytesColumn(3),
        [b("aaa"), b("bbb"),
         b("ccc"), b("aaa"),
         b("bbb"), b("ccc")],
        b("\x00") * 3)
    _rt(columns.StructColumn("ifH", (0, 0.0, 0)), [(100, 1.5, 15000),
                                                   (-100, -5.0, 0),
                                                   (5820, 6.5, 462),
                                                   (-57829, -1.5, 6),
                                                   (0, 0, 0)], (0, 0.0, 0))

    numcol = columns.NumericColumn
    _rt(numcol("b"), [10, -20, 30, -25, 15], 0)
    _rt(numcol("B"), [10, 20, 30, 25, 15], 0)
    _rt(numcol("h"), [1000, -2000, 3000, -15000, 32000], 0)
    _rt(numcol("H"), [1000, 2000, 3000, 15000, 50000], 0)
    _rt(numcol("i"), [2**16, -(2**20), 2**24, -(2**28), 2**30], 0)
    _rt(numcol("I"), [2**16, 2**20, 2**24, 2**28, 2**31 & 0xFFFFFFFF], 0)
    _rt(numcol("q"), [10, -20, 30, -25, 15], 0)
    _rt(numcol("Q"), [2**35, 2**40, 2**48, 2**52, 2**63], 0)
    _rt(numcol("f"), [1.5, -2.5, 3.5, -4.5, 1.25], 0)
    _rt(numcol("d"), [1.5, -2.5, 3.5, -4.5, 1.25], 0)

    c = columns.BitColumn(compress_at=10)
    _rt(c, [bool(random.randint(0, 1)) for _ in xrange(70)], False)
    _rt(c, [bool(random.randint(0, 1)) for _ in xrange(90)], False)

    c = columns.PickleColumn(columns.VarBytesColumn())
    _rt(c, [None, True, False, 100, -7, "hello"], None)

    c = columns.VarBytesListColumn()
    _rt(c, [[b('garnet'), b('amethyst')], [b('pearl')]], [])
    c = columns.VarBytesListColumn()

    c = columns.FixedBytesListColumn(4)
    _rt(c, [[b('garn'), b('amet')], [b('pear')]], [])
Beispiel #2
0
    import zlib
except ImportError:
    zlib = None

# This byte sequence is written at the start of a posting list to identify the
# codec/version
WHOOSH3_HEADER_MAGIC = b("W3Bl")

# Column type to store field length info
LENGTHS_COLUMN = columns.NumericColumn("B", default=0)
# Column type to store pointers to vector posting lists
VECTOR_COLUMN = columns.NumericColumn("I")
# Column type to store vector posting list lengths
VECTOR_LEN_COLUMN = columns.NumericColumn("i")
# Column type to store values of stored fields
STORED_COLUMN = columns.PickleColumn(columns.CompressedBytesColumn())


class W3Codec(base.CodecWithGraph):
    # File extensions
    TERMS_EXT = ".trm"  # Term index
    POSTS_EXT = ".pst"  # Term postings
    VPOSTS_EXT = ".vps"  # Vector postings
    COLUMN_EXT = ".col"  # Per-document value columns

    def __init__(self, blocklimit=128, compression=3, inlinelimit=1):
        self._blocklimit = blocklimit
        self._compression = compression
        self._inlinelimit = inlinelimit

    # Per-document value writer