def test_varbytes_offsets():
    values = u("alfa bravo charlie delta echo foxtrot golf hotel").split()
    vlen = len(values)

    # Without offsets:
    col = columns.VarBytesColumn(allow_offsets=False)
    schema = fields.Schema(name=fields.ID(sortable=col))
    with TempIndex(schema) as ix:
        with ix.writer() as w:
            for i in xrange(5000):
                w.add_document(name=values[i % vlen])

        with ix.reader() as r:
            cr = r.column_reader("name")
            assert isinstance(cr, columns.TranslatingColumnReader)
            assert not cr.raw_column().had_stored_offsets
            for i in (10, 100, 1000, 3000):
                assert cr[i] == values[i % vlen]

    # With offsets
    col = columns.VarBytesColumn(allow_offsets=True, write_offsets_cutoff=4096)
    schema = fields.Schema(name=fields.ID(sortable=col))
    with TempIndex(schema) as ix:
        with ix.writer() as w:
            for i in xrange(5000):
                w.add_document(name=values[i % vlen])

        with ix.reader() as r:
            cr = r.column_reader("name")
            assert isinstance(cr, columns.TranslatingColumnReader)
            assert cr.raw_column().had_stored_offsets
            for i in (10, 100, 1000, 3000):
                assert cr[i] == values[i % vlen]
Ejemplo n.º 2
0
def test_roundtrip():
    _rt(columns.VarBytesColumn(),
        [b("a"), b("ccc"), b("bbb"),
         b("e"), b("dd")], b(""))
    _rt(columns.FixedBytesColumn(5), [
        b("aaaaa"), b("eeeee"),
        b("ccccc"), b("bbbbb"),
        b("eeeee")
    ],
        b("\x00") * 5)
    _rt(columns.RefBytesColumn(),
        [b("a"), b("ccc"),
         b("bb"), b("ccc"),
         b("a"), b("bb")], b(""))
    _rt(columns.RefBytesColumn(3),
        [b("aaa"), b("bbb"),
         b("ccc"), b("aaa"),
         b("bbb"), b("ccc")],
        b("\x00") * 3)
    _rt(columns.StructColumn("ifH", (0, 0.0, 0)), [(100, 1.5, 15000),
                                                   (-100, -5.0, 0),
                                                   (5820, 6.5, 462),
                                                   (-57829, -1.5, 6),
                                                   (0, 0, 0)], (0, 0.0, 0))

    numcol = columns.NumericColumn
    _rt(numcol("b"), [10, -20, 30, -25, 15], 0)
    _rt(numcol("B"), [10, 20, 30, 25, 15], 0)
    _rt(numcol("h"), [1000, -2000, 3000, -15000, 32000], 0)
    _rt(numcol("H"), [1000, 2000, 3000, 15000, 50000], 0)
    _rt(numcol("i"), [2**16, -(2**20), 2**24, -(2**28), 2**30], 0)
    _rt(numcol("I"), [2**16, 2**20, 2**24, 2**28, 2**31 & 0xFFFFFFFF], 0)
    _rt(numcol("q"), [10, -20, 30, -25, 15], 0)
    _rt(numcol("Q"), [2**35, 2**40, 2**48, 2**52, 2**63], 0)
    _rt(numcol("f"), [1.5, -2.5, 3.5, -4.5, 1.25], 0)
    _rt(numcol("d"), [1.5, -2.5, 3.5, -4.5, 1.25], 0)

    c = columns.BitColumn(compress_at=10)
    _rt(c, [bool(random.randint(0, 1)) for _ in xrange(70)], False)
    _rt(c, [bool(random.randint(0, 1)) for _ in xrange(90)], False)

    c = columns.PickleColumn(columns.VarBytesColumn())
    _rt(c, [None, True, False, 100, -7, "hello"], None)

    c = columns.VarBytesListColumn()
    _rt(c, [[b('garnet'), b('amethyst')], [b('pearl')]], [])
    c = columns.VarBytesListColumn()

    c = columns.FixedBytesListColumn(4)
    _rt(c, [[b('garn'), b('amet')], [b('pear')]], [])
Ejemplo n.º 3
0
def test_pickleability():
    # Ignore base classes
    ignore = (columns.Column, columns.WrappedColumn, columns.ListColumn)
    # Required arguments
    init_args = {
        "ClampedNumericColumn": (columns.NumericColumn("B"), ),
        "FixedBytesColumn": (5, ),
        "FixedBytesListColumn": (5, ),
        "NumericColumn": ("i", ),
        "PickleColumn": (columns.VarBytesColumn(), ),
        "StructColumn": ("=if", (0, 0.0)),
    }

    coltypes = [
        c for _, c in inspect.getmembers(columns, inspect.isclass)
        if issubclass(c, columns.Column) and not c in ignore
    ]

    for coltype in coltypes:
        args = init_args.get(coltype.__name__, ())
        try:
            inst = coltype(*args)
        except TypeError:
            e = sys.exc_info()[1]
            raise TypeError("Error instantiating %r: %s" % (coltype, e))
        _ = loads(dumps(inst, -1))
Ejemplo n.º 4
0
    def __init__(self, analyzer=None, phrase=True, chars=False, vector=None,
                 stored=False, field_boost=1.0, multitoken_query="default",
                 spelling=False, sortable=False, lang=None):
        """
        :param analyzer: The analysis.Analyzer to use to index the field
            contents. See the analysis module for more information. If you omit
            this argument, the field uses analysis.StandardAnalyzer.
        :param phrase: Whether the store positional information to allow phrase
            searching.
        :param chars: Whether to store character ranges along with positions.
            If this is True, "phrase" is also implied.
        :param vector: A :class:`whoosh.formats.Format` object to use to store
            term vectors, or ``True`` to store vectors using the same format as
            the inverted index, or ``None`` or ``False`` to not store vectors.
            By default, fields do not store term vectors.
        :param stored: Whether to store the value of this field with the
            document. Since this field type generally contains a lot of text,
            you should avoid storing it with the document unless you need to,
            for example to allow fast excerpts in the search results.
        :param spelling: Whether to generate word graphs for this field to make
            spelling suggestions much faster.
        :param sortable: If True, make this field sortable using the default
            column type. If you pass a :class:`whoosh.columns.Column` instance
            instead of True, the field will use the given column type.
        :param lang: automaticaly configure a
            :class:`whoosh.analysis.LanguageAnalyzer` for the given language.
            This is ignored if you also specify an ``analyzer``.
        """

        if analyzer:
            self.analyzer = analyzer
        elif lang:
            self.analyzer = analysis.LanguageAnalyzer(lang)
        else:
            self.analyzer = analysis.StandardAnalyzer()

        if chars:
            formatclass = formats.Characters
        elif phrase:
            formatclass = formats.Positions
        else:
            formatclass = formats.Frequency
        self.format = formatclass(field_boost=field_boost)

        if vector:
            if type(vector) is type:
                vector = vector()
            elif isinstance(vector, formats.Format):
                pass
            else:
                vector = formatclass()
        else:
            vector = None
        self.vector = vector

        if sortable:
            if isinstance(sortable, columns.Column):
                self.column_type = sortable
            else:
                self.column_type = columns.VarBytesColumn()
        else:
            self.column_type = None

        self.multitoken_query = multitoken_query
        self.scorable = True
        self.stored = stored
        self.spelling = spelling
Ejemplo n.º 5
0
 def __init__(self, columnobj=None):
     if columnobj is None:
         columnobj = columns.VarBytesColumn()
     if not isinstance(columnobj, columns.Column):
         raise TypeError("%r is not a column object" % (columnobj,))
     self.column_type = columnobj
Ejemplo n.º 6
0
 def default_column(self):
     return columns.VarBytesColumn()