コード例 #1
0
def test_varbytes_offsets():
    values = u("alfa bravo charlie delta echo foxtrot golf hotel").split()
    vlen = len(values)

    # Without offsets:
    col = columns.VarBytesColumn(allow_offsets=False)
    schema = fields.Schema(name=fields.ID(sortable=col))
    with TempIndex(schema) as ix:
        with ix.writer() as w:
            for i in xrange(5000):
                w.add_document(name=values[i % vlen])

        with ix.reader() as r:
            cr = r.column_reader("name")
            assert isinstance(cr, columns.TranslatingColumnReader)
            assert not cr.raw_column().had_stored_offsets
            for i in (10, 100, 1000, 3000):
                assert cr[i] == values[i % vlen]

    # With offsets
    col = columns.VarBytesColumn(allow_offsets=True, write_offsets_cutoff=4096)
    schema = fields.Schema(name=fields.ID(sortable=col))
    with TempIndex(schema) as ix:
        with ix.writer() as w:
            for i in xrange(5000):
                w.add_document(name=values[i % vlen])

        with ix.reader() as r:
            cr = r.column_reader("name")
            assert isinstance(cr, columns.TranslatingColumnReader)
            assert cr.raw_column().had_stored_offsets
            for i in (10, 100, 1000, 3000):
                assert cr[i] == values[i % vlen]
コード例 #2
0
def test_delete_nonexistant():
    from whoosh.writing import IndexingError

    schema = fields.Schema(id=fields.ID(stored=True))
    # Single segment
    with TempIndex(schema, "deletenon1") as ix:
        w = ix.writer()
        for char in u"ABC":
            w.add_document(id=char)
        w.commit()

        try:
            w = ix.writer()
            with pytest.raises(IndexingError):
                w.delete_document(5)
        finally:
            w.cancel()

    # Multiple segments
    with TempIndex(schema, "deletenon1") as ix:
        for char in u"ABC":
            w = ix.writer()
            w.add_document(id=char)
            w.commit(merge=False)

        try:
            w = ix.writer()
            with pytest.raises(IndexingError):
                w.delete_document(5)
        finally:
            w.cancel()
コード例 #3
0
ファイル: test_indexing.py プロジェクト: fortable1999/whoosh
def test_stored_tuple():
    schema = fields.Schema(a=fields.STORED, b=fields.ID)

    with TempIndex(schema) as ix:
        with ix.writer() as w:
            w.add_document(a=("foo", 20))

        with ix.searcher() as s:
            assert s.stored_fields(0) == {"a": ("foo", 20)}

    with TempIndex(schema) as ix:
        with ix.writer() as w:
            w.add_document(a=("alfa", 1), b=u"a")
            w.add_document(a=("bravo", 2), b=u"b")
            w.add_document(a=("charlie", 3), b=u"c")
            w.add_document(a=("delta", 4), b=u"d")

        with ix.writer() as w:
            w.add_document(a=("echo", 5), b=u"e")
            w.add_document(a=("foxtrot", 6), b=u"f")
            w.add_document(a=("golf", 7), b=u"g")
            w.add_document(a=("hotel", 8), b=u"h")
            w.merge = False

        with ix.searcher() as s:
            doc = s.document(b=u"f")
            assert doc["a"] == ("foxtrot", 6)
コード例 #4
0
def test_sort_text_field():
    domain = (("Visual Display of Quantitative Information, The", 10),
              ("Envisioning Information", 10),
              ("Visual Explanations", 10),
              ("Beautiful Evidence", -10),
              ("Visual and Statistical Thinking", -10),
              ("Cognitive Style of Powerpoint", -10))
    sorted_titles = sorted(d[0] for d in domain)

    schema = fields.Schema(title=fields.TEXT(stored=True, sortable=True),
                           num=fields.NUMERIC(sortable=True))

    def test(ix):
        with ix.searcher() as s:
            # Sort by title
            r = s.search(query.Every(), sortedby="title")
            titles = [hit["title"] for hit in r]
            assert titles == sorted_titles

            # Sort by reverse title
            facet = sorting.FieldFacet("title", reverse=True)
            r = s.search(query.Every(), sortedby=facet)
            assert [hit["title"] for hit in r] == list(reversed(sorted_titles))

            # Sort by num (-10 to 10) first, and within that, by reverse title
            facet = sorting.MultiFacet()
            facet.add_field("num")
            facet.add_field("title", reverse=True)

            r = s.search(query.Every(), sortedby=facet)
            target = ["Visual and Statistical Thinking",
                      "Cognitive Style of Powerpoint",
                      "Beautiful Evidence",
                      "Visual Explanations",
                      "Visual Display of Quantitative Information, The",
                      "Envisioning Information",
                      ]
            assert [hit["title"] for hit in r] == target

    # Single segment
    with TempIndex(schema) as ix:
        with ix.writer() as w:
            for title, num in domain:
                w.add_document(title=u(title), num=num)
        test(ix)

    # Multisegment
    with TempIndex(schema) as ix:
        # Segment 1
        with ix.writer() as w:
            for title, num in domain[:3]:
                w.add_document(title=u(title), num=num)
        # Segment 2
        with ix.writer() as w:
            for title, num in domain[3:]:
                w.add_document(title=u(title), num=num)
            w.merge = False
        test(ix)
コード例 #5
0
def test_column_query():
    schema = fields.Schema(id=fields.STORED,
                           a=fields.ID(sortable=True),
                           b=fields.NUMERIC(sortable=True))
    with TempIndex(schema, "columnquery") as ix:
        with ix.writer(codec=W3Codec()) as w:
            w.add_document(id=1, a=u("alfa"), b=10)
            w.add_document(id=2, a=u("bravo"), b=20)
            w.add_document(id=3, a=u("charlie"), b=30)
            w.add_document(id=4, a=u("delta"), b=40)
            w.add_document(id=5, a=u("echo"), b=50)
            w.add_document(id=6, a=u("foxtrot"), b=60)

        with ix.searcher() as s:
            def check(q):
                return [s.stored_fields(docnum)["id"] for docnum in q.docs(s)]

            q = query.ColumnQuery("a", u("bravo"))
            assert check(q) == [2]

            q = query.ColumnQuery("b", 30)
            assert check(q) == [3]

            q = query.ColumnQuery("a", lambda v: v != u("delta"))
            assert check(q) == [1, 2, 3, 5, 6]

            q = query.ColumnQuery("b", lambda v: v > 30)
            assert check(q) == [4, 5, 6]
コード例 #6
0
def test_correct_spell_field():
    ana = analysis.StemmingAnalyzer()
    schema = fields.Schema(text=fields.TEXT(analyzer=ana, spelling=True))
    with TempIndex(schema) as ix:
        with ix.writer() as w:
            w.add_document(text=u"rendering shading modeling reactions")

        with ix.searcher() as s:
            text = s.schema["text"]
            spell_text = s.schema["spell_text"]

            r = s.reader()
            words = [text.from_bytes(t) for t in r.lexicon("text")]
            assert words == ["model", "reaction", "render", "shade"]

            words = [spell_text.from_bytes(t) for t in r.lexicon("spell_text")]
            assert words == ["modeling", "reactions", "rendering", "shading"]

            qp = QueryParser("text", s.schema)
            qtext = u"renderink"
            q = qp.parse(qtext, s.schema)

            r = s.search(q)
            assert len(r) == 0

            c = s.correct_query(q, qtext)
            assert c.string == "rendering"
            assert c.query == query.Term("text", "rendering")

            hf = highlight.HtmlFormatter(classname="c")
            assert c.format_string(
                hf) == '<strong class="c term0">rendering</strong>'
コード例 #7
0
def test_spelling_field():
    text = u"rendering shading modeling reactions"
    ana = analysis.StemmingAnalyzer()
    schema = fields.Schema(text=fields.TEXT(analyzer=ana, spelling=True))

    assert schema["text"].spelling
    assert schema["text"].separate_spelling()

    with TempIndex(schema) as ix:
        with ix.writer() as w:
            w.add_document(text=text)

        with ix.searcher() as s:
            r = s.reader()
            fieldobj = schema["text"]
            words = [fieldobj.from_bytes(t) for t in r.lexicon("text")]
            assert words == ["model", "reaction", "render", "shade"]

            words = [fieldobj.from_bytes(t) for t in r.lexicon("spell_text")]
            assert words == ["modeling", "reactions", "rendering", "shading"]

            # suggest() automatically looks in the spell_text field because
            # it calls fieldobj.spelling_fieldname() first
            assert s.suggest("text", "renderink") == ["rendering"]

        with ix.writer() as w:
            w.delete_document(0)
コード例 #8
0
def test_correct_query():
    schema = fields.Schema(a=fields.TEXT(), b=fields.TEXT)
    with TempIndex(schema) as ix:
        with ix.writer() as w:
            w.add_document(a=u"alfa bravo charlie delta")
            w.add_document(a=u"delta echo foxtrot golf")
            w.add_document(a=u"golf hotel india juliet")
            w.add_document(a=u"juliet kilo lima mike")

        with ix.searcher() as s:
            qp = QueryParser("a", ix.schema)
            qtext = u'alpha ("brovo november" OR b:dolta) detail'
            q = qp.parse(qtext, ix.schema)

            c = s.correct_query(q, qtext)
            cq = c.query
            assert isinstance(cq, query.And)
            assert cq[0].text == "alfa"
            assert isinstance(cq[1], query.Or)
            assert isinstance(cq[1][0], query.Phrase)
            assert cq[1][0].words == ["bravo", "november"]

            qtext = u'alpha b:("brovo november" a:delta) detail'
            q = qp.parse(qtext, ix.schema)
            c = s.correct_query(q, qtext)
            assert c.query.__unicode__(
            ) == '(a:alfa AND b:"brovo november" AND a:delta AND a:detail)'
            assert c.string == 'alfa b:("brovo november" a:delta) detail'

            hf = highlight.HtmlFormatter(classname="c")
            assert c.format_string(
                hf
            ) == '<strong class="c term0">alfa</strong> b:("brovo november" a:delta) detail'
コード例 #9
0
def test_asyncwriter_no_stored():
    schema = fields.Schema(id=fields.ID, text=fields.TEXT)
    with TempIndex(schema, "asyncnostored") as ix:
        domain = (u"alfa", u"bravo", u"charlie", u"delta", u"echo",
                  u"foxtrot", u"golf", u"hotel", u"india")

        writers = []
        # Simulate doing 20 (near-)simultaneous commits. If we weren't using
        # AsyncWriter, at least some of these would fail because the first
        # writer wouldn't be finished yet.
        for i in xrange(20):
            w = writing.AsyncWriter(ix)
            writers.append(w)
            w.add_document(id=text_type(i),
                           text=u" ".join(random.sample(domain, 5)))
            w.commit()

        # Wait for all writers to finish before checking the results
        for w in writers:
            if w.running:
                w.join()

        # Check whether all documents made it into the index.
        with ix.reader() as r:
            assert sorted([int(id) for id in r.lexicon("id")]) == list(range(20))
コード例 #10
0
def test_add_reader_spelling():
    # Test whether add_spell_word() items get copied over in a merge

    # Because b is stemming and spelled, it will use add_spell_word()
    ana = analysis.StemmingAnalyzer()
    schema = fields.Schema(a=fields.TEXT(analyzer=ana),
                           b=fields.TEXT(analyzer=ana, spelling=True))

    with TempIndex(schema, "addreadersp") as ix:
        with ix.writer() as w:
            w.add_document(a=u"rendering modeling",
                           b=u"rendering modeling")
            w.add_document(a=u"flying rolling",
                           b=u"flying rolling")

        with ix.writer() as w:
            w.add_document(a=u"writing eyeing",
                           b=u"writing eyeing")
            w.add_document(a=u"undoing indicating",
                           b=u"undoing indicating")
            w.optimize = True

        with ix.reader() as r:
            sws = list(r.lexicon("spell_b"))
            assert sws == [b"eyeing", b"flying", b"indicating", b"modeling",
                           b"rendering", b"rolling",  b"undoing", b"writing"]

            assert list(r.terms_within("a", "undoink", 1)) == []
            assert list(r.terms_within("b", "undoink", 1)) == ["undoing"]
コード例 #11
0
ファイル: test_fields.py プロジェクト: sangensong/whoosh-1
def test_pickle_schema():
    from whoosh import analysis
    from whoosh.support.charset import accent_map
    from whoosh.compat import dumps

    freetext_analyzer = (analysis.StemmingAnalyzer()
                         | analysis.CharsetFilter(accent_map))

    schema = fields.Schema(path=fields.ID(stored=True, unique=True),
                           file_mtime=fields.DATETIME(stored=True),
                           name=fields.TEXT(stored=False, field_boost=2.0),
                           description=fields.TEXT(stored=False,
                                                   field_boost=1.5,
                                                   analyzer=freetext_analyzer),
                           content=fields.TEXT(analyzer=freetext_analyzer))

    # Try to make some sentences that will require stemming
    docs = [
        u"The rain in spain falls mainly in the plain",
        u"Plainly sitting on the plain",
        u"Imagine a greatly improved sentence here"
    ]

    with TempIndex(schema) as ix:
        with ix.writer() as w:
            for doc in docs:
                w.add_document(description=doc, content=doc)

        assert dumps(schema, 2)

        with ix.reader() as r:
            assert dumps(r.schema, 2)
コード例 #12
0
ファイル: test_sorting.py プロジェクト: sangensong/whoosh-1
def test_sort_filter():
    schema = fields.Schema(group=fields.ID(stored=True),
                           key=fields.ID(stored=True))
    groups = u("alfa bravo charlie").split()
    keys = u("abcdefghijklmnopqrstuvwxyz")
    source = []
    for i in xrange(100):
        key = keys[i % len(keys)]
        group = groups[i % len(groups)]
        source.append({"key": key, "group": group})
    source.sort(key=lambda x: (x["key"], x["group"]))

    sample = list(source)
    random.shuffle(sample)

    with TempIndex(schema, "sortfilter") as ix:
        w = ix.writer()
        for i, fs in enumerate(sample):
            w.add_document(**fs)
            i += 1
            if not i % 26:
                w.commit(merge=False)
                w = ix.writer()
        w.commit()

        fq = query.Term("group", u("bravo"))

        with ix.searcher() as s:
            r = s.search(query.Every(),
                         sortedby=("key", "group"),
                         filter=fq,
                         limit=20)
            assert [h.fields() for h in r
                    ] == [d for d in source if d["group"] == "bravo"][:20]

            fq = query.Term("group", u("bravo"))
            r = s.search(query.Every(),
                         sortedby=("key", "group"),
                         filter=fq,
                         limit=None)
            assert [h.fields() for h in r
                    ] == [d for d in source if d["group"] == "bravo"]

        ix.optimize()

        with ix.searcher() as s:
            r = s.search(query.Every(),
                         sortedby=("key", "group"),
                         filter=fq,
                         limit=20)
            assert [h.fields() for h in r
                    ] == [d for d in source if d["group"] == "bravo"][:20]

            fq = query.Term("group", u("bravo"))
            r = s.search(query.Every(),
                         sortedby=("key", "group"),
                         filter=fq,
                         limit=None)
            assert [h.fields() for h in r
                    ] == [d for d in source if d["group"] == "bravo"]
コード例 #13
0
ファイル: test_sorting.py プロジェクト: sangensong/whoosh-1
def test_multifacet():
    schema = fields.Schema(tag=fields.ID(stored=True),
                           size=fields.ID(stored=True))
    with TempIndex(schema, "multifacet") as ix:
        w = ix.writer()
        w.add_document(tag=u("alfa"), size=u("small"))
        w.add_document(tag=u("bravo"), size=u("medium"))
        w.add_document(tag=u("alfa"), size=u("large"))
        w.add_document(tag=u("bravo"), size=u("small"))
        w.add_document(tag=u("alfa"), size=u("medium"))
        w.add_document(tag=u("bravo"), size=u("medium"))
        w.commit()

        correct = {
            (u('bravo'), u('medium')): [1, 5],
            (u('alfa'), u('large')): [2],
            (u('alfa'), u('medium')): [4],
            (u('alfa'), u('small')): [0],
            (u('bravo'), u('small')): [3]
        }

        with ix.searcher() as s:
            facet = sorting.MultiFacet(["tag", "size"])
            r = s.search(query.Every(), groupedby={"tag/size": facet})
            cats = r.groups(("tag/size"))
            assert cats == correct
コード例 #14
0
ファイル: test_sorting.py プロジェクト: sangensong/whoosh-1
def test_page_sorted():
    schema = fields.Schema(key=fields.ID(stored=True))
    with TempIndex(schema, "pagesorted") as ix:
        domain = list(u("abcdefghijklmnopqrstuvwxyz"))
        random.shuffle(domain)

        w = ix.writer()
        for char in domain:
            w.add_document(key=char)
        w.commit()

        with ix.searcher() as s:
            r = s.search(query.Every(), sortedby="key", limit=5)
            assert r.scored_length() == 5
            assert len(r) == s.doc_count_all()

            rp = s.search_page(query.Every(), 1, pagelen=5, sortedby="key")
            assert "".join([h["key"] for h in rp]) == "abcde"
            assert rp[10:] == []

            rp = s.search_page(query.Term("key", "glonk"),
                               1,
                               pagelen=5,
                               sortedby="key")
            assert len(rp) == 0
            assert rp.is_last_page()
コード例 #15
0
ファイル: test_writing.py プロジェクト: CuteCha/dssm-theano
def test_add_reader_spelling():
    # Test whether add_spell_word() items get copied over in a merge

    # Because b is stemming and spelled, it will use add_spell_word()
    ana = analysis.StemmingAnalyzer()
    schema = fields.Schema(a=fields.TEXT(spelling=True),
                           b=fields.TEXT(analyzer=ana, spelling=True))
    with TempIndex(schema, "addreadersp") as ix:
        with ix.writer() as w:
            w.add_document(a=u("rendering modeling compositing enabling"),
                           b=u("rendering modeling compositing enabling"))
            w.add_document(a=u("flying rolling tying quitting polling"),
                           b=u("flying rolling tying quitting polling"))

        with ix.writer() as w:
            w.add_document(a=u("writing eyeing ripping timing yelling"),
                           b=u("writing eyeing ripping timing yelling"))
            w.add_document(a=u("undoing indicating opening pressing"),
                           b=u("undoing indicating opening pressing"))

        with ix.searcher() as s:
            gr = s.reader().word_graph("a")
            assert " ".join(gr.flatten_strings()) == (
                "compositing enabling eyeing flying indicating "
                "modeling opening polling pressing quitting "
                "rendering ripping rolling timing tying undoing "
                "writing yelling")

            gr = s.reader().word_graph("b")
            assert " ".join(gr.flatten_strings()) == (
                "compositing enabling eyeing flying indicating "
                "modeling opening polling pressing quitting "
                "rendering ripping rolling timing tying undoing "
                "writing yelling")
コード例 #16
0
ファイル: test_writing.py プロジェクト: CuteCha/dssm-theano
def test_buffered_update():
    schema = fields.Schema(id=fields.ID(stored=True, unique=True),
                           payload=fields.STORED)
    with TempIndex(schema, "bufferedupdate") as ix:
        w = writing.BufferedWriter(ix, period=None, limit=5)
        for i in xrange(10):
            for char in u("abc"):
                fs = dict(id=char, payload=text_type(i) + char)
                w.update_document(**fs)

        with w.reader() as r:
            sfs = [sf for _, sf in r.iter_docs()]
            sfs = sorted(sfs, key=lambda x: x["id"])
            assert sfs == [{
                'id': u('a'),
                'payload': u('9a')
            }, {
                'id': u('b'),
                'payload': u('9b')
            }, {
                'id': u('c'),
                'payload': u('9c')
            }]
            assert r.doc_count() == 3

        w.close()
コード例 #17
0
def test_vector_merge():
    schema = fields.Schema(title=fields.TEXT,
                           content=fields.TEXT(vector=formats.Frequency()))

    with TempIndex(schema, "vectormerge") as ix:
        writer = ix.writer()
        writer.add_document(title=u("one"),
                            content=u("This is the story of the black hole " +
                                      "story"))
        writer.commit()

        writer = ix.writer()
        writer.add_document(title=u("two"),
                            content=u("You can read along in your book"))
        writer.commit()

        with ix.searcher() as s:
            r = s.reader()

            docnum = s.document_number(title=u("one"))
            vec = list(r.vector_as("frequency", docnum, "content"))
            assert vec == [(u('black'), 1), (u('hole'), 1), (u('story'), 2)]

            docnum = s.document_number(title=u("two"))

            vec = list(r.vector_as("frequency", docnum, "content"))
            assert vec == [(u('along'), 1), (u('book'), 1), (u('read'), 1)]
コード例 #18
0
ファイル: test_flexible.py プロジェクト: CuteCha/dssm-theano
def test_removefield():
    schema = fields.Schema(id=fields.ID(stored=True),
                           content=fields.TEXT,
                           city=fields.KEYWORD(stored=True))
    with TempIndex(schema, "removefield") as ix:
        w = ix.writer()
        w.add_document(id=u("b"), content=u("bravo"), city=u("baghdad"))
        w.add_document(id=u("c"), content=u("charlie"), city=u("cairo"))
        w.add_document(id=u("d"), content=u("delta"), city=u("dakar"))
        w.commit()

        with ix.searcher() as s:
            assert s.document(id=u("c")) == {"id": "c", "city": "cairo"}

        w = ix.writer()
        w.remove_field("content")
        w.remove_field("city")
        w.commit()

        ixschema = ix._current_schema()
        assert ixschema.names() == ["id"]
        assert ixschema.stored_names() == ["id"]

        with ix.searcher() as s:
            assert ("content", b("charlie")) not in s.reader()
            assert s.document(id=u("c")) == {"id": u("c")}
コード例 #19
0
ファイル: test_indexing.py プロジェクト: CuteCha/dssm-theano
def test_globfield_length_merge():
    # Issue 343

    schema = fields.Schema(title=fields.TEXT(stored=True),
                           path=fields.ID(stored=True))
    schema.add("*_text", fields.TEXT, glob=True)

    with TempIndex(schema, "globlenmerge") as ix:
        with ix.writer() as w:
            w.add_document(
                title=u("First document"),
                path=u("/a"),
                content_text=u("This is the first document we've added!"))

        with ix.writer() as w:
            w.add_document(
                title=u("Second document"),
                path=u("/b"),
                content_text=u(
                    "The second document is even more interesting!"))

        with ix.searcher() as s:
            docnum = s.document_number(path="/a")
            assert s.doc_field_length(docnum, "content_text") is not None

            qp = qparser.QueryParser("content", schema)
            q = qp.parse("content_text:document")
            r = s.search(q)
            paths = sorted(hit["path"] for hit in r)
            assert paths == ["/a", "/b"]
コード例 #20
0
ファイル: test_spelling.py プロジェクト: CuteCha/dssm-theano
def test_suggest_prefix():
    domain = ("Shoot To Kill", "Bloom, Split and Deviate",
              "Rankle the Seas and the Skies", "Lightning Flash Flame Shell",
              "Flower Wind Rage and Flower God Roar, Heavenly Wind Rage and "
              "Heavenly Demon Sneer",
              "All Waves, Rise now and Become my Shield, Lightning, Strike "
              "now and Become my Blade",
              "Cry, Raise Your Head, Rain Without end",
              "Sting All Enemies To Death", "Reduce All Creation to Ash",
              "Sit Upon the Frozen Heavens", "Call forth the Twilight")

    schema = fields.Schema(content=fields.TEXT(stored=True, spelling=True),
                           quick=fields.NGRAM(maxsize=10, stored=True))

    with TempIndex(schema, "sugprefix") as ix:
        with ix.writer() as w:
            for item in domain:
                content = u(item)
                w.add_document(content=content, quick=content)

        with ix.searcher() as s:
            sugs = s.suggest("content", u("ra"), maxdist=2, prefix=2)
            assert sugs == ['rage', 'rain']

            sugs = s.suggest("content", "ra", maxdist=2, prefix=1)
            assert sugs == ["rage", "rain", "roar"]
コード例 #21
0
ファイル: test_indexing.py プロジェクト: CuteCha/dssm-theano
def test_noscorables1():
    values = [
        u("alfa"),
        u("bravo"),
        u("charlie"),
        u("delta"),
        u("echo"),
        u("foxtrot"),
        u("golf"),
        u("hotel"),
        u("india"),
        u("juliet"),
        u("kilo"),
        u("lima")
    ]
    from random import choice, sample, randint

    times = 1000

    schema = fields.Schema(id=fields.ID, tags=fields.KEYWORD)
    with TempIndex(schema, "noscorables1") as ix:
        w = ix.writer()
        for _ in xrange(times):
            w.add_document(id=choice(values),
                           tags=u(" ").join(sample(values, randint(2, 7))))
        w.commit()

        with ix.searcher() as s:
            s.search(query.Term("id", "bravo"))
コード例 #22
0
ファイル: test_indexing.py プロジェクト: CuteCha/dssm-theano
def test_deleteall():
    schema = fields.Schema(text=fields.TEXT)
    with TempIndex(schema, "deleteall") as ix:
        w = ix.writer()
        domain = u("alfa bravo charlie delta echo").split()
        for i, ls in enumerate(permutations(domain)):
            w.add_document(text=u(" ").join(ls))
            if not i % 10:
                w.commit()
                w = ix.writer()
        w.commit()

        # This is just a test, don't use this method to delete all docs IRL!
        doccount = ix.doc_count_all()
        w = ix.writer()
        for docnum in xrange(doccount):
            w.delete_document(docnum)
        w.commit()

        with ix.searcher() as s:
            r = s.search(
                query.Or([
                    query.Term("text", u("alfa")),
                    query.Term("text", u("bravo"))
                ]))
            assert len(r) == 0

        ix.optimize()
        assert ix.doc_count_all() == 0

        with ix.reader() as r:
            assert list(r) == []
コード例 #23
0
ファイル: test_indexing.py プロジェクト: CuteCha/dssm-theano
def test_update():
    # Test update with multiple unique keys
    SAMPLE_DOCS = [
        {
            "id": u("test1"),
            "path": u("/test/1"),
            "text": u("Hello")
        },
        {
            "id": u("test2"),
            "path": u("/test/2"),
            "text": u("There")
        },
        {
            "id": u("test3"),
            "path": u("/test/3"),
            "text": u("Reader")
        },
    ]

    schema = fields.Schema(id=fields.ID(unique=True, stored=True),
                           path=fields.ID(unique=True, stored=True),
                           text=fields.TEXT)

    with TempIndex(schema, "update") as ix:
        with ix.writer() as w:
            for doc in SAMPLE_DOCS:
                w.add_document(**doc)

        with ix.writer() as w:
            w.update_document(id=u("test2"),
                              path=u("test/1"),
                              text=u("Replacement"))
コード例 #24
0
ファイル: test_indexing.py プロジェクト: CuteCha/dssm-theano
def test_reindex():
    SAMPLE_DOCS = [
        {
            'id': u('test1'),
            'text': u('This is a document. Awesome, is it not?')
        },
        {
            'id': u('test2'),
            'text': u('Another document. Astounding!')
        },
        {
            'id':
            u('test3'),
            'text':
            u('A fascinating article on the behavior of domestic '
              'steak knives.')
        },
    ]

    schema = fields.Schema(text=fields.TEXT(stored=True),
                           id=fields.ID(unique=True, stored=True))
    with TempIndex(schema, "reindex") as ix:

        def reindex():
            writer = ix.writer()
            for doc in SAMPLE_DOCS:
                writer.update_document(**doc)
            writer.commit()

        reindex()
        assert ix.doc_count_all() == 3
        reindex()
        assert ix.doc_count_all() == 3
コード例 #25
0
def test_cursor():
    schema = fields.Schema(text=fields.TEXT)
    with TempIndex(schema) as ix:
        with ix.writer() as w:
            w.add_document(text=u"papa quebec romeo sierra tango")
            w.add_document(text=u"foxtrot golf hotel india juliet")
            w.add_document(text=u"alfa bravo charlie delta echo")
            w.add_document(text=u"uniform victor whiskey x-ray")
            w.add_document(text=u"kilo lima mike november oskar")
            w.add_document(text=u"charlie alfa alfa bravo bravo bravo")

        with ix.reader() as r:
            cur = r.cursor("text")
            assert cur.text() == "alfa"
            assert cur.next() == "bravo"
            assert cur.text() == "bravo"

            assert cur.find(b"inc") == "india"
            assert cur.text() == "india"

            cur.first() == "alfa"
            assert cur.text() == "alfa"

            assert cur.find(b"zulu") is None
            assert cur.text() is None
            assert not cur.is_valid()

            assert cur.find(b"a") == "alfa"
            assert cur.term_info().weight() == 3
            assert cur.next() == "bravo"
            assert cur.term_info().weight() == 4
            assert cur.next() == "charlie"
            assert cur.term_info().weight() == 2
コード例 #26
0
def test_batchsize_eq_doccount():
    check_multi()
    schema = fields.Schema(a=fields.KEYWORD(stored=True))
    with TempIndex(schema) as ix:
        with ix.writer(procs=4, batchsize=10) as w:
            for i in xrange(10):
                w.add_document(a=u(str(i)))
コード例 #27
0
ファイル: test_fields.py プロジェクト: sangensong/whoosh-1
def test_pickle_idlist():
    schema = fields.Schema(
        pk=fields.ID(stored=True, unique=True),
        text=fields.TEXT(),
        tags=fields.IDLIST(stored=True),
    )
    with TempIndex(schema) as ix:
        assert ix
コード例 #28
0
 def check(method):
     with TempIndex(get_schema()) as ix:
         method(ix)
         with ix.searcher() as s:
             results = s.search(query.Every(), groupedby="tag")
             groups = results.groups()
             assert sorted(groups.items()) == [(u('one'), [0, 6]),
                                               (u('three'), [1, 3, 7, 8]),
                                               (u('two'), [2, 4, 5])]
コード例 #29
0
def test_delete_recovery():
    schema = fields.Schema(text=fields.TEXT)
    with TempIndex(schema, "delrecover") as ix:
        rw = RecoverWriter(ix)
        rr = RecoverReader(ix)
        rw.start()
        rr.start()
        rw.join()
        rr.join()
コード例 #30
0
def test_no_add():
    check_multi()
    from whoosh.multiproc import MpWriter

    schema = fields.Schema(text=fields.TEXT(stored=True, spelling=True,
                                            vector=True))
    with TempIndex(schema) as ix:
        with ix.writer(procs=3) as w:
            assert type(w) == MpWriter