def test_column_query():
    schema = fields.Schema(id=fields.STORED,
                           a=fields.ID(sortable=True),
                           b=fields.NUMERIC(sortable=True))
    with TempIndex(schema, "columnquery") as ix:
        with ix.writer(codec=W3Codec()) as w:
            w.add_document(id=1, a=u("alfa"), b=10)
            w.add_document(id=2, a=u("bravo"), b=20)
            w.add_document(id=3, a=u("charlie"), b=30)
            w.add_document(id=4, a=u("delta"), b=40)
            w.add_document(id=5, a=u("echo"), b=50)
            w.add_document(id=6, a=u("foxtrot"), b=60)

        with ix.searcher() as s:
            def check(q):
                return [s.stored_fields(docnum)["id"] for docnum in q.docs(s)]

            q = query.ColumnQuery("a", u("bravo"))
            assert check(q) == [2]

            q = query.ColumnQuery("b", 30)
            assert check(q) == [3]

            q = query.ColumnQuery("a", lambda v: v != u("delta"))
            assert check(q) == [1, 2, 3, 5, 6]

            q = query.ColumnQuery("b", lambda v: v > 30)
            assert check(q) == [4, 5, 6]
def test_hit_column():
    # Not stored
    schema = fields.Schema(text=fields.TEXT())
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        w.add_document(text=u("alfa bravo charlie"))

    with ix.searcher() as s:
        r = s.search(query.Term("text", "alfa"))
        assert len(r) == 1
        hit = r[0]
        with pytest.raises(KeyError):
            _ = hit["text"]

    # With column
    schema = fields.Schema(text=fields.TEXT(sortable=True))
    ix = RamStorage().create_index(schema)
    with ix.writer(codec=W3Codec()) as w:
        w.add_document(text=u("alfa bravo charlie"))

    with ix.searcher() as s:
        r = s.search(query.Term("text", "alfa"))
        assert len(r) == 1
        hit = r[0]
        assert hit["text"] == u("alfa bravo charlie")
def test_multivalue():
    schema = fields.Schema(s=fields.TEXT(sortable=True),
                           n=fields.NUMERIC(sortable=True))
    ix = RamStorage().create_index(schema)
    with ix.writer(codec=W3Codec()) as w:
        w.add_document(s=u("alfa foxtrot charlie").split(), n=[100, 200, 300])
        w.add_document(s=u("juliet bravo india").split(), n=[10, 20, 30])

    with ix.reader() as r:
        scr = r.column_reader("s")
        assert list(scr) == ["alfa", "juliet"]

        ncr = r.column_reader("n")
        assert list(ncr) == [100, 10]
def test_column_field():
    schema = fields.Schema(a=fields.TEXT(sortable=True),
                           b=fields.COLUMN(columns.RefBytesColumn()))
    with TempIndex(schema, "columnfield") as ix:
        with ix.writer(codec=W3Codec()) as w:
            w.add_document(a=u("alfa bravo"), b=b("charlie delta"))
            w.add_document(a=u("bravo charlie"), b=b("delta echo"))
            w.add_document(a=u("charlie delta"), b=b("echo foxtrot"))

        with ix.reader() as r:
            assert r.has_column("a")
            assert r.has_column("b")

            cra = r.column_reader("a")
            assert cra[0] == u("alfa bravo")
            assert type(cra[0]) == text_type

            crb = r.column_reader("b")
            assert crb[0] == b("charlie delta")
            assert type(crb[0]) == bytes_type
Ejemplo n.º 5
0
def default_codec(*args, **kwargs):
    from whoosh.codec.whoosh3 import W3Codec

    return W3Codec(*args, **kwargs)
Ejemplo n.º 6
0
def test_plaintext_codec():
    pytest.importorskip("ast")
    from whoosh.codec.plaintext import PlainTextCodec
    from whoosh.codec.whoosh3 import W3Codec

    ana = analysis.StemmingAnalyzer()
    schema = fields.Schema(a=fields.TEXT(vector=True, sortable=True),
                           b=fields.STORED,
                           c=fields.NUMERIC(stored=True, sortable=True),
                           d=fields.TEXT(analyzer=ana, spelling=True))

    st = RamStorage()
    ix = st.create_index(schema)
    with ix.writer(codec=W3Codec()) as w:
        w.add_document(a=u("alfa bravo charlie"), b="hello", c=100,
                       d=u("quelling whining echoing"))
        w.add_document(a=u("bravo charlie delta"), b=1000, c=200,
                       d=u("rolling timing yelling"))
        w.add_document(a=u("charlie delta echo"), b=5.5, c=300,
                       d=u("using opening pulling"))
        w.add_document(a=u("delta echo foxtrot"), b=True, c=-100,
                       d=u("aching selling dipping"))
        w.add_document(a=u("echo foxtrot india"), b=None, c=-200,
                       d=u("filling going hopping"))

    with ix.reader() as r:
        assert r.has_column("a")
        c = r.column_reader("a")
        assert c[2] == u("charlie delta echo")

    w = ix.writer(codec=PlainTextCodec())
    w.commit(optimize=True)

    with ix.searcher() as s:
        reader = s.reader()

        r = s.search(query.Term("a", "delta"))
        assert len(r) == 3
        assert [hit["b"] for hit in r] == [1000, 5.5, True]

        assert (" ".join(s.field_terms("a"))
                == "alfa bravo charlie delta echo foxtrot india")

        assert reader.doc_field_length(2, "a") == 3

        cfield = schema["c"]
        assert type(cfield), fields.NUMERIC
        sortables = list(cfield.sortable_terms(reader, "c"))
        assert sortables
        assert ([cfield.from_bytes(t) for t in sortables]
                == [-200, -100, 100, 200, 300])

        assert reader.has_column("a")
        c = reader.column_reader("a")
        assert c[2] == u("charlie delta echo")

        assert reader.has_column("c")
        c = reader.column_reader("c")
        assert list(c) == [100, 200, 300, -100, -200]

        assert s.has_vector(2, "a")
        v = s.vector(2, "a")
        assert " ".join(v.all_ids()) == "charlie delta echo"