Python u Examples, whoosh.compat.u Python Examples

Example #1

0

Show file

File: test_classify.py Project: JunjieHu/dl

def test_keyterms():
    ix = create_index()
    with ix.searcher() as s:
        docnum = s.document_number(path="/a")
        keys = list(s.key_terms([docnum], "content", numterms=3))
        assert ([t[0] for t in keys]
                == [u("collision"), u("calculations"), u("damped")])

Example #2

0

Show file

File: compound.py Project: Apophus/microblog

 def __unicode__(self):
     r = u("DisMax(")
     r += " ".join(sorted(text_type(s) for s in self.subqueries))
     r += u(")")
     if self.tiebreak:
         r += u("~") + text_type(self.tiebreak)
     return r

Example #3

0

Show file

File: test_parsing.py Project: gsadikin/whoosh

def test_andnot():
    qp = default.QueryParser("content", None)
    q = qp.parse(u("this ANDNOT that"))
    assert q.__class__ == query.AndNot
    assert q.a.__class__ == query.Term
    assert q.b.__class__ == query.Term
    assert q.a.text == "this"
    assert q.b.text == "that"

    q = qp.parse(u("foo ANDNOT bar baz"))
    assert q.__class__ == query.And
    assert len(q) == 2
    assert q[0].__class__ == query.AndNot
    assert q[1].__class__ == query.Term

    q = qp.parse(u("foo fie ANDNOT bar baz"))
    assert q.__class__ == query.And
    assert len(q) == 3
    assert q[0].__class__ == query.Term
    assert q[1].__class__ == query.AndNot
    assert q[2].__class__ == query.Term

    q = qp.parse(u("a AND b ANDNOT c"))
    assert q.__class__ == query.AndNot
    assert text_type(q) == "((content:a AND content:b) ANDNOT content:c)"

Example #4

0

Show file

File: test_parsing.py Project: gsadikin/whoosh

def test_phrase_andmaybe():
    qp = default.QueryParser("f", None)

    q = qp.parse(u('Dahmen ANDMAYBE "Besov Spaces"'))
    assert isinstance(q, query.AndMaybe)
    assert q[0] == query.Term("f", u("Dahmen"))
    assert q[1] == query.Phrase("f", [u("Besov"), u("Spaces")])

Example #5

0

Show file

File: test_weightings.py Project: JunjieHu/dl

def test_compatibility():
    from whoosh.scoring import Weighting

    # This is the old way of doing a custom weighting model, check that
    # it's still supported...
    class LegacyWeighting(Weighting):
        use_final = True

        def score(self, searcher, fieldname, text, docnum, weight):
            return weight + 0.5

        def final(self, searcher, docnum, score):
            return score * 1.5

    schema = fields.Schema(text=fields.TEXT)
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    domain = "alfa bravo charlie delta".split()
    for ls in permutations(domain, 3):
        w.add_document(text=u(" ").join(ls))
    w.commit()

    s = ix.searcher(weighting=LegacyWeighting())
    r = s.search(query.Term("text", u("bravo")))
    assert r.score(0) == 2.25

Example #6

0

Show file

File: compound.py Project: Apophus/microblog

 def __unicode__(self):
     r = u("(")
     r += (self.JOINT).join([text_type(s) for s in self.subqueries])
     r += u(")")
     if self.minmatch:
         r += u(">%s") % self.minmatch
     return r

Example #7

0

Show file

File: test_parsing.py Project: ChimmyTee/oh-mainline

def test_andnot():
    qp = default.QueryParser("content", None)
    q = qp.parse(u("this ANDNOT that"))
    assert_equal(q.__class__, query.AndNot)
    assert_equal(q.a.__class__, query.Term)
    assert_equal(q.b.__class__, query.Term)
    assert_equal(q.a.text, "this")
    assert_equal(q.b.text, "that")

    q = qp.parse(u("foo ANDNOT bar baz"))
    assert_equal(q.__class__, query.And)
    assert_equal(len(q), 2)
    assert_equal(q[0].__class__, query.AndNot)
    assert_equal(q[1].__class__, query.Term)

    q = qp.parse(u("foo fie ANDNOT bar baz"))
    assert_equal(q.__class__, query.And)
    assert_equal(len(q), 3)
    assert_equal(q[0].__class__, query.Term)
    assert_equal(q[1].__class__, query.AndNot)
    assert_equal(q[2].__class__, query.Term)

    q = qp.parse(u("a AND b ANDNOT c"))
    assert_equal(q.__class__, query.AndNot)
    assert_equal(text_type(q), "((content:a AND content:b) ANDNOT content:c)")

Example #8

0

Show file

File: test_indexing.py Project: pombredanne/whoosh

def test_deleteall():
    schema = fields.Schema(text=fields.TEXT)
    with TempIndex(schema, "deleteall") as ix:
        w = ix.writer()
        domain = u("alfa bravo charlie delta echo").split()
        for i, ls in enumerate(permutations(domain)):
            w.add_document(text=u(" ").join(ls))
            if not i % 10:
                w.commit()
                w = ix.writer()
        w.commit()

        # This is just a test, don't use this method to delete all docs IRL!
        doccount = ix.doc_count_all()
        w = ix.writer()
        for docnum in xrange(doccount):
            w.delete_document(docnum)
        w.commit()

        with ix.searcher() as s:
            r = s.search(query.Or([query.Term("text", u("alfa")),
                                   query.Term("text", u("bravo"))]))
            assert len(r) == 0

        ix.optimize()
        assert ix.doc_count_all() == 0

        with ix.reader() as r:
            assert list(r) == []

Example #9

0

Show file

File: test_fields.py Project: JunjieHu/dl

def test_boolean_strings():
    schema = fields.Schema(i=fields.STORED, b=fields.BOOLEAN(stored=True))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        w.add_document(i=0, b="true")
        w.add_document(i=1, b="True")
        w.add_document(i=2, b="false")
        w.add_document(i=3, b="False")
        w.add_document(i=4, b=u("true"))
        w.add_document(i=5, b=u("True"))
        w.add_document(i=6, b=u("false"))
        w.add_document(i=7, b=u("False"))

    with ix.searcher() as s:
        qp = qparser.QueryParser("b", ix.schema)

        def check(qs, nums):
            q = qp.parse(qs)
            r = s.search(q, limit=None)
            assert [hit["i"] for hit in r] == nums

        trues = [0, 1, 4, 5]
        falses = [2, 3, 6, 7]
        check("true", trues)
        check("True", trues)
        check("false", falses)
        check("False", falses)
        check("t", trues)
        check("f", falses)

Example #10

0

Show file

File: test_queries.py Project: pombredanne/whoosh-clone

def test_requires():
    a = Term("f", u("a"))
    b = Term("f", u("b"))
    assert And([a, b]).requires() == set([a, b])
    assert Or([a, b]).requires() == set()
    assert AndMaybe(a, b).requires() == set([a])
    assert a.requires() == set([a])

Example #11

0

Show file

File: test_parsing.py Project: ChimmyTee/oh-mainline

def test_colonspace():
    s = fields.Schema(content=fields.TEXT, url=fields.ID)
    qp = default.QueryParser("content", s)
    q = qp.parse(u("url:test"))
    assert_equal(q.__class__, query.Term)
    assert_equal(q.fieldname, "url")
    assert_equal(q.text, "test")

    q = qp.parse(u("url: test"))
    assert_equal(q.__class__, query.And)
    assert_equal(q[0].__class__, query.Term)
    assert_equal(q[1].__class__, query.Term)
    assert_equal(q[0].fieldname, "content")
    assert_equal(q[1].fieldname, "content")
    assert_equal(q[0].text, "url")
    assert_equal(q[1].text, "test")

    q = qp.parse(u("url:"))
    assert_equal(q.__class__, query.Term)
    assert_equal(q.fieldname, "content")
    assert_equal(q.text, "url")

    s = fields.Schema(foo=fields.KEYWORD)
    qp = default.QueryParser("foo", s)
    q = qp.parse(u("blah:"))
    assert_equal(q.__class__, query.Term)
    assert_equal(q.fieldname, "foo")
    assert_equal(q.text, "blah:")

Example #12

0

Show file

File: test_fields.py Project: JunjieHu/dl

def test_datetime():
    dtf = fields.DATETIME(stored=True)
    schema = fields.Schema(id=fields.ID(stored=True), date=dtf)
    st = RamStorage()
    ix = st.create_index(schema)

    w = ix.writer()
    for month in xrange(1, 12):
        for day in xrange(1, 28):
            w.add_document(id=u("%s-%s") % (month, day),
                           date=datetime(2010, month, day, 14, 0, 0))
    w.commit()

    with ix.searcher() as s:
        qp = qparser.QueryParser("id", schema)

        r = s.search(qp.parse("date:20100523"))
        assert len(r) == 1
        assert r[0]["id"] == "5-23"
        assert r[0]["date"].__class__ is datetime
        assert r[0]["date"].month == 5
        assert r[0]["date"].day == 23

        r = s.search(qp.parse("date:'2010 02'"))
        assert len(r) == 27

        q = qp.parse(u("date:[2010-05 to 2010-08]"))
        startdt = datetime(2010, 5, 1, 0, 0, 0, 0)
        enddt = datetime(2010, 8, 31, 23, 59, 59, 999999)
        assert q.__class__ is query.NumericRange
        assert q.start == times.datetime_to_long(startdt)
        assert q.end == times.datetime_to_long(enddt)

Example #13

0

Show file

File: test_fields.py Project: JunjieHu/dl

def test_boolean():
    schema = fields.Schema(id=fields.ID(stored=True),
                           done=fields.BOOLEAN(stored=True))
    ix = RamStorage().create_index(schema)

    w = ix.writer()
    w.add_document(id=u("a"), done=True)
    w.add_document(id=u("b"), done=False)
    w.add_document(id=u("c"), done=True)
    w.add_document(id=u("d"), done=False)
    w.add_document(id=u("e"), done=True)
    w.commit()

    with ix.searcher() as s:
        qp = qparser.QueryParser("id", schema)

        r = s.search(qp.parse("done:true"))
        assert sorted([d["id"] for d in r]) == ["a", "c", "e"]
        assert all(d["done"] for d in r)

        r = s.search(qp.parse("done:yes"))
        assert sorted([d["id"] for d in r]) == ["a", "c", "e"]
        assert all(d["done"] for d in r)

        q = qp.parse("done:false")
        assert q.__class__ == query.Term
        assert q.text is False
        assert schema["done"].to_bytes(False) == b("f")
        r = s.search(q)
        assert sorted([d["id"] for d in r]) == ["b", "d"]
        assert not any(d["done"] for d in r)

        r = s.search(qp.parse("done:no"))
        assert sorted([d["id"] for d in r]) == ["b", "d"]
        assert not any(d["done"] for d in r)

Example #14

0

Show file

File: test_analysis.py Project: ChimmyTee/oh-mainline

def test_substitution():
    mf = analysis.RegexTokenizer(r"\S+") | analysis.SubstitutionFilter("-", "")
    assert_equal([t.text for t in mf(u("one-two th-re-ee four"))],
                 ["onetwo", "threee", "four"])
    
    mf = analysis.RegexTokenizer(r"\S+") | analysis.SubstitutionFilter("([^=]*)=(.*)", r"\2=\1")
    assert_equal([t.text for t in mf(u("a=b c=d ef"))], ["b=a", "d=c", "ef"])

Example #15

0

Show file

File: test_queries.py Project: ChimmyTee/oh-mainline

def test_requires():
    a = Term("f", u("a"))
    b = Term("f", u("b"))
    assert_equal(And([a, b]).requires(), set([a, b]))
    assert_equal(Or([a, b]).requires(), set())
    assert_equal(AndMaybe(a, b).requires(), set([a]))
    assert_equal(a.requires(), set([a]))

Example #16

0

Show file

File: terms.py Project: 32footsteps/SpecialCollectionsProject

 def __unicode__(self):
     r = u("%s:%s") % (self.fieldname, self.text) + u("~")
     if self.maxdist > 1:
         r += u("%d") % self.maxdist
     if self.boost != 1.0:
         r += u("^%f") % self.boost
     return r

Example #17

0

Show file

File: test_searching.py Project: ChimmyTee/oh-mainline

def test_missing_field_scoring():
    schema = fields.Schema(name=fields.TEXT(stored=True),
                           hobbies=fields.TEXT(stored=True))
    storage = RamStorage()
    ix = storage.create_index(schema)
    writer = ix.writer()
    writer.add_document(name=u('Frank'), hobbies=u('baseball, basketball'))
    writer.commit()
    r = ix.reader()
    assert_equal(r.field_length("hobbies"), 2)
    assert_equal(r.field_length("name"), 1)
    r.close()

    writer = ix.writer()
    writer.add_document(name=u('Jonny'))
    writer.commit()

    with ix.searcher() as s:
        r = s.reader()
        assert_equal(len(ix._segments()), 1)
        assert_equal(r.field_length("hobbies"), 2)
        assert_equal(r.field_length("name"), 2)

        parser = qparser.MultifieldParser(['name', 'hobbies'], schema)
        q = parser.parse(u("baseball"))
        result = s.search(q)
        assert_equal(len(result), 1)

Example #18

0

Show file

File: terms.py Project: intabeta/inta

 def __unicode__(self):
     r = self.text + u("~")
     if self.maxdist > 1:
         r += u("%d") % self.maxdist
     if self.boost != 1.0:
         r += u("^%f") % self.boost
     return r

Example #19

0

Show file

File: test_searching.py Project: ChimmyTee/oh-mainline

def test_boost_phrase():
    schema = fields.Schema(title=fields.TEXT(field_boost=5.0, stored=True), text=fields.TEXT)
    ix = RamStorage().create_index(schema)
    domain = u("alfa bravo charlie delta").split()
    w = ix.writer()
    for ls in permutations(domain):
        t = u(" ").join(ls)
        w.add_document(title=t, text=t)
    w.commit()

    q = Or([Term("title", u("alfa")), Term("title", u("bravo")), Phrase("text", [u("bravo"), u("charlie"), u("delta")])])

    def boost_phrases(q):
        if isinstance(q, Phrase):
            q.boost *= 1000.0
            return q
        else:
            return q.apply(boost_phrases)
    q = boost_phrases(q)

    with ix.searcher() as s:
        r = s.search(q, limit=None)
        for hit in r:
            if "bravo charlie delta" in hit["title"]:
                assert hit.score > 100.0

Example #20

0

Show file

File: test_writing.py Project: ChimmyTee/oh-mainline

def test_fractional_weights():
    ana = analysis.RegexTokenizer(r"\S+") | analysis.DelimitedAttributeFilter()
    
    # With Positions format
    schema = fields.Schema(f=fields.TEXT(analyzer=ana))
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(f=u("alfa^0.5 bravo^1.5 charlie^2.0 delta^1.5"))
    w.commit()
    
    with ix.searcher() as s:
        wts = []
        for word in s.lexicon("f"):
            p = s.postings("f", word)
            wts.append(p.weight())
        assert_equal(wts, [0.5, 1.5, 2.0, 1.5])
    
    # Try again with Frequency format
    schema = fields.Schema(f=fields.TEXT(analyzer=ana, phrase=False))
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(f=u("alfa^0.5 bravo^1.5 charlie^2.0 delta^1.5"))
    w.commit()
    
    with ix.searcher() as s:
        wts = []
        for word in s.lexicon("f"):
            p = s.postings("f", word)
            wts.append(p.weight())
        assert_equal(wts, [0.5, 1.5, 2.0, 1.5])

Example #21

0

Show file

File: test_writing.py Project: ChimmyTee/oh-mainline

def test_delete_nonexistant():
    from whoosh.writing import IndexingError
    
    schema = fields.Schema(id=fields.ID(stored=True))
    # Single segment
    with TempIndex(schema, "deletenon1") as ix:
        w = ix.writer()
        for char in u("ABC"):
            w.add_document(id=char)
        w.commit()
        
        try:
            w = ix.writer()
            assert_raises(IndexingError, w.delete_document, 5)
        finally:
            w.cancel()
    
    # Multiple segments
    with TempIndex(schema, "deletenon1") as ix:
        for char in u("ABC"):
            w = ix.writer()
            w.add_document(id=char)
            w.commit(merge=False)
        
        try:
            w = ix.writer()
            assert_raises(IndexingError, w.delete_document, 5)
        finally:
            w.cancel()

Example #22

0

Show file

File: test_parse_plugins.py Project: ChimmyTee/oh-mainline

def test_gtlt():
    schema = fields.Schema(a=fields.KEYWORD, b=fields.NUMERIC,
                           c=fields.KEYWORD,
                           d=fields.NUMERIC(float), e=fields.DATETIME)
    qp = qparser.QueryParser("a", schema)
    qp.add_plugin(plugins.GtLtPlugin())
    qp.add_plugin(dateparse.DateParserPlugin())

    q = qp.parse(u("a:hello b:>100 c:<=z there"))
    assert_equal(q.__class__, query.And)
    assert_equal(len(q), 4)
    assert_equal(q[0], query.Term("a", "hello"))
    assert_equal(q[1], query.NumericRange("b", 100, None, startexcl=True))
    assert_equal(q[2], query.TermRange("c", None, 'z'))
    assert_equal(q[3], query.Term("a", "there"))

    q = qp.parse(u("hello e:>'29 mar 2001' there"))
    assert_equal(q.__class__, query.And)
    assert_equal(len(q), 3)
    assert_equal(q[0], query.Term("a", "hello"))
    # As of this writing, date ranges don't support startexcl/endexcl
    assert_equal(q[1], query.DateRange("e", datetime(2001, 3, 29, 0, 0), None))
    assert_equal(q[2], query.Term("a", "there"))

    q = qp.parse(u("a:> alfa c:<= bravo"))
    assert_equal(text_type(q), "(a:a: AND a:alfa AND a:c: AND a:bravo)")

    qp.remove_plugin_class(plugins.FieldsPlugin)
    qp.remove_plugin_class(plugins.RangePlugin)
    q = qp.parse(u("hello a:>500 there"))
    assert_equal(text_type(q), "(a:hello AND a:a: AND a:500 AND a:there)")

Example #23

0

Show file

File: test_parse_plugins.py Project: ChimmyTee/oh-mainline

def test_pseudofield():
    schema = fields.Schema(a=fields.KEYWORD, b=fields.TEXT)

    def regex_maker(node):
        if node.has_text:
            node = qparser.RegexPlugin.RegexNode(node.text)
            node.set_fieldname("content")
            return node

    qp = qparser.QueryParser("a", schema)
    qp.add_plugin(qparser.PseudoFieldPlugin({"regex": regex_maker}))
    q = qp.parse(u("alfa regex:br.vo"))
    assert_equal(q.__unicode__(), '(a:alfa AND content:r"br.vo")')

    def rev_text(node):
        if node.has_text:
            # Create a word node for the reversed text
            revtext = node.text[::-1]  # Reverse the text
            rnode = qparser.WordNode(revtext)
            # Duplicate the original node's start and end char
            rnode.set_range(node.startchar, node.endchar)

            # Put the original node and the reversed node in an OrGroup
            group = qparser.OrGroup([node, rnode])

            # Need to set the fieldname here because the PseudoFieldPlugin
            # removes the field name syntax
            group.set_fieldname("reverse")

            return group

    qp = qparser.QueryParser("content", schema)
    qp.add_plugin(qparser.PseudoFieldPlugin({"reverse": rev_text}))
    q = qp.parse(u("alfa reverse:bravo"))
    assert_equal(q.__unicode__(), '(content:alfa AND (reverse:bravo OR reverse:ovarb))')

Example #24

0

Show file

File: test_results.py Project: pombredanne/whoosh-clone

def test_hit_column():
    # Not stored
    schema = fields.Schema(text=fields.TEXT())
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        w.add_document(text=u("alfa bravo charlie"))

    with ix.searcher() as s:
        r = s.search(query.Term("text", "alfa"))
        assert len(r) == 1
        hit = r[0]
        with pytest.raises(KeyError):
            _ = hit["text"]

    # With column
    schema = fields.Schema(text=fields.TEXT(sortable=True))
    ix = RamStorage().create_index(schema)
    with ix.writer(codec=W3Codec()) as w:
        w.add_document(text=u("alfa bravo charlie"))

    with ix.searcher() as s:
        r = s.search(query.Term("text", "alfa"))
        assert len(r) == 1
        hit = r[0]
        assert hit["text"] == u("alfa bravo charlie")

Example #25

0

Show file

File: test_sorting.py Project: JunjieHu/dl

def test_sorting_function():
    schema = fields.Schema(id=fields.STORED, text=fields.TEXT(stored=True, vector=True))
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    domain = ("alfa", "bravo", "charlie")
    count = 1
    for w1 in domain:
        for w2 in domain:
            for w3 in domain:
                for w4 in domain:
                    w.add_document(id=count, text=u(" ").join((w1, w2, w3, w4)))
                    count += 1
    w.commit()

    def fn(searcher, docnum):
        v = dict(searcher.vector_as("frequency", docnum, "text"))
        # Sort documents that have equal number of "alfa"
        # and "bravo" first
        return 0 - 1.0 / (abs(v.get("alfa", 0) - v.get("bravo", 0)) + 1.0)

    fnfacet = sorting.FunctionFacet(fn)

    with ix.searcher() as s:
        q = query.And([query.Term("text", u("alfa")), query.Term("text", u("bravo"))])
        results = s.search(q, sortedby=fnfacet)
        r = [hit["text"] for hit in results]
        for t in r[:10]:
            tks = t.split()
            assert tks.count("alfa") == tks.count("bravo")

Example #26

0

Show file

File: test_sorting.py Project: JunjieHu/dl

def test_compound_sort():
    fspec = fields.KEYWORD(stored=True, sortable=True)
    schema = fields.Schema(a=fspec, b=fspec, c=fspec)
    ix = RamStorage().create_index(schema)

    alist = u("alfa bravo alfa bravo alfa bravo alfa bravo alfa bravo").split()
    blist = u("alfa bravo charlie alfa bravo charlie alfa bravo charlie alfa").split()
    clist = u("alfa bravo charlie delta echo foxtrot golf hotel india juliet").split()
    assert all(len(ls) == 10 for ls in (alist, blist, clist))

    with ix.writer() as w:
        for i in xrange(10):
            w.add_document(a=alist[i], b=blist[i], c=clist[i])

    with ix.searcher() as s:
        q = query.Every()
        sortedby = [sorting.FieldFacet("a"), sorting.FieldFacet("b", reverse=True), sorting.FieldFacet("c")]

        r = s.search(q, sortedby=sortedby)
        output = []
        for hit in r:
            output.append(" ".join((hit["a"], hit["b"], hit["c"])))

        assert output == [
            "alfa charlie charlie",
            "alfa charlie india",
            "alfa bravo echo",
            "alfa alfa alfa",
            "alfa alfa golf",
            "bravo charlie foxtrot",
            "bravo bravo bravo",
            "bravo bravo hotel",
            "bravo alfa delta",
            "bravo alfa juliet",
        ]

Example #27

0

Show file

File: test_sorting.py Project: JunjieHu/dl

def test_overlapping_lists():
    schema = fields.Schema(id=fields.STORED, tags=fields.KEYWORD)
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        w.add_document(id=0, tags=u("alfa bravo charlie"))
        w.add_document(id=1, tags=u("bravo charlie delta"))
        w.add_document(id=2, tags=u("charlie delta echo"))
        w.add_document(id=3, tags=u("delta echo alfa"))
        w.add_document(id=4, tags=u("echo alfa bravo"))

    with ix.searcher() as s:
        of = sorting.FieldFacet("tags", allow_overlap=True)
        cat = of.categorizer(s)
        assert not cat._use_vectors

        r = s.search(query.Every(), groupedby={"tags": of})
        assert r.groups("tags") == {
            "alfa": [0, 3, 4],
            "bravo": [0, 1, 4],
            "charlie": [0, 1, 2],
            "delta": [1, 2, 3],
            "echo": [2, 3, 4],
        }

        fcts = sorting.Facets()
        fcts.add_field("tags", allow_overlap=True)
        r = s.search(query.Every(), groupedby=fcts)
        assert r.groups("tags") == {
            "alfa": [0, 3, 4],
            "bravo": [0, 1, 4],
            "charlie": [0, 1, 2],
            "delta": [1, 2, 3],
            "echo": [2, 3, 4],
        }

Example #28

0

Show file

File: test_sorting.py Project: JunjieHu/dl

 def check(method):
     with TempIndex(get_schema()) as ix:
         method(ix)
         with ix.searcher() as s:
             results = s.search(query.Every(), groupedby="tag")
             groups = results.groups()
             assert sorted(groups.items()) == [(u("one"), [0, 6]), (u("three"), [1, 3, 7, 8]), (u("two"), [2, 4, 5])]

Example #29

0

Show file

File: test_reading.py Project: JunjieHu/dl

def test_term_inspection():
    schema = fields.Schema(title=fields.TEXT(stored=True),
                           content=fields.TEXT)
    st = RamStorage()
    ix = st.create_index(schema)
    writer = ix.writer()
    writer.add_document(title=u("My document"),
                        content=u("AA AA BB BB CC AA AA AA BB BB CC DD EE EE"))
    writer.add_document(title=u("My other document"),
                        content=u("AA AB BB CC EE EE AX AX DD"))
    writer.commit()

    reader = ix.reader()
    assert " ".join(reader.field_terms("content")) == "aa ab ax bb cc dd ee"
    assert list(reader.expand_prefix("content", "a")) == [b('aa'), b('ab'), b('ax')]
    assert set(reader.all_terms()) == set([('content', b('aa')), ('content', b('ab')),
                                           ('content', b('ax')), ('content', b('bb')),
                                           ('content', b('cc')), ('content', b('dd')),
                                           ('content', b('ee')), ('title', b('document')),
                                           ('title', b('my')), ('title', b('other'))])
    # (text, doc_freq, index_freq)
    assert _fstats(reader.iter_field("content")) == [(b('aa'), 2, 6), (b('ab'), 1, 1), (b('ax'), 1, 2),
                                                     (b('bb'), 2, 5), (b('cc'), 2, 3), (b('dd'), 2, 2),
                                                     (b('ee'), 2, 4)]
    assert _fstats(reader.iter_field("content", prefix="c")) == [(b('cc'), 2, 3), (b('dd'), 2, 2), (b('ee'), 2, 4)]
    assert list(reader.most_frequent_terms("content")) == [(6, b('aa')), (5, b('bb')), (4, b('ee')), (3, b('cc')), (2, b('dd'))]
    assert list(reader.most_frequent_terms("content", prefix="a")) == [(6, b('aa')), (2, b('ax')), (1, b('ab'))]
    assert list(reader.most_distinctive_terms("content", 3)) == [(1.3862943611198906, b('ax')), (0.6931471805599453, b('ab')), (0.0, b('ee'))]

Example #30

0

Show file

File: test_spelling.py Project: JunjieHu/dl

def test_suggest_prefix():
    domain = ("Shoot To Kill",
              "Bloom, Split and Deviate",
              "Rankle the Seas and the Skies",
              "Lightning Flash Flame Shell",
              "Flower Wind Rage and Flower God Roar, Heavenly Wind Rage and "
              "Heavenly Demon Sneer",
              "All Waves, Rise now and Become my Shield, Lightning, Strike "
              "now and Become my Blade",
              "Cry, Raise Your Head, Rain Without end",
              "Sting All Enemies To Death",
              "Reduce All Creation to Ash",
              "Sit Upon the Frozen Heavens",
              "Call forth the Twilight")

    schema = fields.Schema(content=fields.TEXT(stored=True, spelling=True),
                           quick=fields.NGRAM(maxsize=10, stored=True))

    with TempIndex(schema, "sugprefix") as ix:
        with ix.writer() as w:
            for item in domain:
                content = u(item)
                w.add_document(content=content, quick=content)

        with ix.searcher() as s:
            sugs = s.suggest("content", u("ra"), maxdist=2, prefix=2)
            assert sugs == ['rage', 'rain']

            sugs = s.suggest("content", "ra", maxdist=2, prefix=1)
            assert sugs == ["rage", "rain", "roar"]

Example #31

0

Show file

def test_variations():
    _run_query(query.Variations("value", u("render")),
               [u("A"), u("C"), u("E")])

Example #32

0

Show file

def test_find_missing():
    schema = fields.Schema(id=fields.ID, text=fields.KEYWORD(stored=True))
    ix = RamStorage().create_index(schema)

    w = ix.writer()
    w.add_document(id=u("1"), text=u("alfa"))
    w.add_document(id=u("2"), text=u("bravo"))
    w.add_document(text=u("charlie"))
    w.add_document(id=u("4"), text=u("delta"))
    w.add_document(text=u("echo"))
    w.add_document(id=u("6"), text=u("foxtrot"))
    w.add_document(text=u("golf"))
    w.commit()

    with ix.searcher() as s:
        qp = qparser.QueryParser("text", schema)
        q = qp.parse(u("NOT id:*"))
        r = s.search(q, limit=None)
        assert_equal(list(h["text"] for h in r), ["charlie", "echo", "golf"])

Example #33

0

Show file

def test_missing_wildcard():
    schema = fields.Schema(id=fields.ID(stored=True), f1=fields.TEXT,
                           f2=fields.TEXT)
    st = RamStorage()
    ix = st.create_index(schema)

    w = ix.writer()
    w.add_document(id=u("1"), f1=u("alfa"), f2=u("apple"))
    w.add_document(id=u("2"), f1=u("bravo"))
    w.add_document(id=u("3"), f1=u("charlie"), f2=u("candy"))
    w.add_document(id=u("4"), f2=u("donut"))
    w.add_document(id=u("5"))
    w.commit()

    with ix.searcher() as s:
        r = s.search(query.Every("id"))
        assert_equal(sorted([d['id'] for d in r]), ["1", "2", "3", "4", "5"])

        r = s.search(query.Every("f1"))
        assert_equal(sorted([d['id'] for d in r]), ["1", "2", "3"])

        r = s.search(query.Every("f2"))
        assert_equal(sorted([d['id'] for d in r]), ["1", "3", "4"])

Example #34

0

Show file

def test_wildcard():
    _run_query(query.Or([query.Wildcard('value', u('*red*')),
                         query.Wildcard('name', u('*yellow*'))]),
               [u("A"), u("C"), u("D"), u("E")])
    # Missing
    _run_query(query.Wildcard('value', 'glonk*'), [])

Example #35

0

Show file

def test_phrase_score():
    schema = fields.Schema(name=fields.ID(stored=True), value=fields.TEXT)
    storage = RamStorage()
    ix = storage.create_index(schema)
    writer = ix.writer()
    writer.add_document(name=u("A"),
                        value=u("Little Miss Muffet sat on a tuffet"))
    writer.add_document(name=u("D"),
                        value=u("Gibberish blonk falunk miss muffet sat " +
                                "tuffet garbonzo"))
    writer.add_document(name=u("E"), value=u("Blah blah blah pancakes"))
    writer.add_document(name=u("F"),
                        value=u("Little miss muffet little miss muffet"))
    writer.commit()

    with ix.searcher() as s:
        q = query.Phrase("value", [u("little"), u("miss"), u("muffet")])
        m = q.matcher(s)
        assert_equal(m.id(), 0)
        score1 = m.weight()
        assert score1 > 0
        m.next()
        assert_equal(m.id(), 3)
        assert m.weight() > score1

Example #36

0

Show file

def test_posting_phrase():
    schema = fields.Schema(name=fields.ID(stored=True), value=fields.TEXT)
    storage = RamStorage()
    ix = storage.create_index(schema)
    writer = ix.writer()
    writer.add_document(name=u("A"),
                        value=u("Little Miss Muffet sat on a tuffet"))
    writer.add_document(name=u("B"), value=u("Miss Little Muffet tuffet"))
    writer.add_document(name=u("C"), value=u("Miss Little Muffet tuffet sat"))
    writer.add_document(name=u("D"),
                        value=u("Gibberish blonk falunk miss muffet sat " +
                                "tuffet garbonzo"))
    writer.add_document(name=u("E"), value=u("Blah blah blah pancakes"))
    writer.commit()

    with ix.searcher() as s:
        def names(results):
            return sorted([fields['name'] for fields in results])

        q = query.Phrase("value", [u("little"), u("miss"), u("muffet"),
                                   u("sat"), u("tuffet")])
        m = q.matcher(s)
        assert_equal(m.__class__.__name__, "SpanNearMatcher")

        r = s.search(q)
        assert_equal(names(r), ["A"])
        assert_equal(len(r), 1)

        q = query.Phrase("value", [u("miss"), u("muffet"), u("sat"),
                                   u("tuffet")])
        assert_equal(names(s.search(q)), ["A", "D"])

        q = query.Phrase("value", [u("falunk"), u("gibberish")])
        r = s.search(q)
        assert_equal(names(r), [])
        assert_equal(len(r), 0)

        q = query.Phrase("value", [u("gibberish"), u("falunk")], slop=2)
        assert_equal(names(s.search(q)), ["D"])

        q = query.Phrase("value", [u("blah")] * 4)
        assert_equal(names(s.search(q)), [])  # blah blah blah blah

        q = query.Phrase("value", [u("blah")] * 3)
        m = q.matcher(s)
        assert_equal(names(s.search(q)), ["E"])

Example #37

0

Show file

def test_multireader():
    sc = fields.Schema(id=fields.ID(stored=True), content=fields.TEXT)
    st = RamStorage()
    ix = st.create_index(sc)
    w = ix.writer()
    w.add_document(id=u("alfa"), content=u("alfa"))
    w.add_document(id=u("bravo"), content=u("bravo"))
    w.add_document(id=u("charlie"), content=u("charlie"))
    w.add_document(id=u("delta"), content=u("delta"))
    w.add_document(id=u("echo"), content=u("echo"))
    w.add_document(id=u("foxtrot"), content=u("foxtrot"))
    w.add_document(id=u("golf"), content=u("golf"))
    w.add_document(id=u("hotel"), content=u("hotel"))
    w.add_document(id=u("india"), content=u("india"))
    w.commit()

    with ix.searcher() as s:
        r = s.search(query.Term("content", u("bravo")))
        assert_equal(len(r), 1)
        assert_equal(r[0]["id"], "bravo")

    w = ix.writer()
    w.add_document(id=u("juliet"), content=u("juliet"))
    w.add_document(id=u("kilo"), content=u("kilo"))
    w.add_document(id=u("lima"), content=u("lima"))
    w.add_document(id=u("mike"), content=u("mike"))
    w.add_document(id=u("november"), content=u("november"))
    w.add_document(id=u("oscar"), content=u("oscar"))
    w.add_document(id=u("papa"), content=u("papa"))
    w.add_document(id=u("quebec"), content=u("quebec"))
    w.add_document(id=u("romeo"), content=u("romeo"))
    w.commit()
    assert_equal(len(ix._segments()), 2)

    #r = ix.reader()
    #assert r.__class__.__name__, "MultiReader")
    #pr = r.postings("content", u("bravo"))

    with ix.searcher() as s:
        r = s.search(query.Term("content", u("bravo")))
        assert_equal(len(r), 1)
        assert_equal(r[0]["id"], "bravo")

Example #38

0

Show file

def test_merged():
    sc = fields.Schema(id=fields.ID(stored=True), content=fields.TEXT)
    st = RamStorage()
    ix = st.create_index(sc)
    w = ix.writer()
    w.add_document(id=u("alfa"), content=u("alfa"))
    w.add_document(id=u("bravo"), content=u("bravo"))
    w.add_document(id=u("charlie"), content=u("charlie"))
    w.add_document(id=u("delta"), content=u("delta"))
    w.commit()

    with ix.searcher() as s:
        r = s.search(query.Term("content", u("bravo")))
        assert_equal(len(r), 1)
        assert_equal(r[0]["id"], "bravo")

    w = ix.writer()
    w.add_document(id=u("echo"), content=u("echo"))
    w.commit()
    assert_equal(len(ix._segments()), 1)

    with ix.searcher() as s:
        r = s.search(query.Term("content", u("bravo")))
        assert_equal(len(r), 1)
        assert_equal(r[0]["id"], "bravo")

Example #39

0

Show file

def test_range():
    schema = fields.Schema(id=fields.ID(stored=True), content=fields.TEXT)
    st = RamStorage()
    ix = st.create_index(schema)

    w = ix.writer()
    w.add_document(id=u("A"), content=u("alfa bravo charlie delta echo"))
    w.add_document(id=u("B"), content=u("bravo charlie delta echo foxtrot"))
    w.add_document(id=u("C"), content=u("charlie delta echo foxtrot golf"))
    w.add_document(id=u("D"), content=u("delta echo foxtrot golf hotel"))
    w.add_document(id=u("E"), content=u("echo foxtrot golf hotel india"))
    w.commit()

    with ix.searcher() as s:
        qp = qparser.QueryParser("content", schema)

        q = qp.parse(u("charlie [delta TO foxtrot]"))
        assert_equal(q.__class__, query.And)
        assert_equal(q[0].__class__, query.Term)
        assert_equal(q[1].__class__, query.TermRange)
        assert_equal(q[1].start, "delta")
        assert_equal(q[1].end, "foxtrot")
        assert_equal(q[1].startexcl, False)
        assert_equal(q[1].endexcl, False)
        ids = sorted([d['id'] for d in s.search(q)])
        assert_equal(ids, [u('A'), u('B'), u('C')])

        q = qp.parse(u("foxtrot {echo TO hotel]"))
        assert_equal(q.__class__, query.And)
        assert_equal(q[0].__class__, query.Term)
        assert_equal(q[1].__class__, query.TermRange)
        assert_equal(q[1].start, "echo")
        assert_equal(q[1].end, "hotel")
        assert_equal(q[1].startexcl, True)
        assert_equal(q[1].endexcl, False)
        ids = sorted([d['id'] for d in s.search(q)])
        assert_equal(ids, [u('B'), u('C'), u('D'), u('E')])

        q = qp.parse(u("{bravo TO delta}"))
        assert_equal(q.__class__, query.TermRange)
        assert_equal(q.start, "bravo")
        assert_equal(q.end, "delta")
        assert_equal(q.startexcl, True)
        assert_equal(q.endexcl, True)
        ids = sorted([d['id'] for d in s.search(q)])
        assert_equal(ids, [u('A'), u('B'), u('C')])

        # Shouldn't match anything
        q = qp.parse(u("[1 to 10]"))
        assert_equal(q.__class__, query.TermRange)
        assert_equal(len(s.search(q)), 0)

Example #40

0

Show file

def test_not2():
    schema = fields.Schema(name=fields.ID(stored=True), value=fields.TEXT)
    storage = RamStorage()
    ix = storage.create_index(schema)
    writer = ix.writer()
    writer.add_document(name=u("a"), value=u("alfa bravo charlie delta echo"))
    writer.add_document(name=u("b"),
                        value=u("bravo charlie delta echo foxtrot"))
    writer.add_document(name=u("c"),
                        value=u("charlie delta echo foxtrot golf"))
    writer.add_document(name=u("d"), value=u("delta echo golf hotel india"))
    writer.add_document(name=u("e"), value=u("echo golf hotel india juliet"))
    writer.commit()

    with ix.searcher() as s:
        p = qparser.QueryParser("value", None)
        results = s.search(p.parse("echo NOT golf"))
        assert_equal(sorted([d["name"] for d in results]), ["a", "b"])

        results = s.search(p.parse("echo NOT bravo"))
        assert_equal(sorted([d["name"] for d in results]), ["c", "d", "e"])

    ix.delete_by_term("value", u("bravo"))

    with ix.searcher() as s:
        results = s.search(p.parse("echo NOT charlie"))
        assert_equal(sorted([d["name"] for d in results]), ["d", "e"])

Example #41

0

Show file

def make_index():
    s = fields.Schema(key=fields.ID(stored=True),
                      name=fields.TEXT,
                      value=fields.TEXT)
    st = RamStorage()
    ix = st.create_index(s)

    w = ix.writer()
    w.add_document(key=u("A"), name=u("Yellow brown"),
                   value=u("Blue red green render purple?"))
    w.add_document(key=u("B"), name=u("Alpha beta"),
                   value=u("Gamma delta epsilon omega."))
    w.add_document(key=u("C"), name=u("One two"),
                   value=u("Three rendered four five."))
    w.add_document(key=u("D"), name=u("Quick went"),
                   value=u("Every red town."))
    w.add_document(key=u("E"), name=u("Yellow uptown"),
                   value=u("Interest rendering outer photo!"))
    w.commit()

    return ix

Example #42

0

Show file

def test_collect_limit():
    schema = fields.Schema(id=fields.STORED, text=fields.TEXT)
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(id="a", text=u("alfa bravo charlie delta echo"))
    w.add_document(id="b", text=u("bravo charlie delta echo foxtrot"))
    w.add_document(id="c", text=u("charlie delta echo foxtrot golf"))
    w.add_document(id="d", text=u("delta echo foxtrot golf hotel"))
    w.add_document(id="e", text=u("echo foxtrot golf hotel india"))
    w.commit()

    with ix.searcher() as s:
        r = s.search(query.Term("text", u("golf")), limit=10)
        assert_equal(len(r), 3)
        count = 0
        for _ in r:
            count += 1
        assert_equal(count, 3)

    w = ix.writer()
    w.add_document(id="f", text=u("foxtrot golf hotel india juliet"))
    w.add_document(id="g", text=u("golf hotel india juliet kilo"))
    w.add_document(id="h", text=u("hotel india juliet kilo lima"))
    w.add_document(id="i", text=u("india juliet kilo lima mike"))
    w.add_document(id="j", text=u("juliet kilo lima mike november"))
    w.commit(merge=False)

    with ix.searcher() as s:
        r = s.search(query.Term("text", u("golf")), limit=20)
        assert_equal(len(r), 5)
        count = 0
        for _ in r:
            count += 1
        assert_equal(count, 5)

Example #43

0

Show file

def test_filter():
    schema = fields.Schema(id=fields.STORED, path=fields.ID, text=fields.TEXT)
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(id=1, path=u("/a/1"), text=u("alfa bravo charlie"))
    w.add_document(id=2, path=u("/b/1"), text=u("bravo charlie delta"))
    w.add_document(id=3, path=u("/c/1"), text=u("charlie delta echo"))
    w.commit(merge=False)
    w = ix.writer()
    w.add_document(id=4, path=u("/a/2"), text=u("delta echo alfa"))
    w.add_document(id=5, path=u("/b/2"), text=u("echo alfa bravo"))
    w.add_document(id=6, path=u("/c/2"), text=u("alfa bravo charlie"))
    w.commit(merge=False)
    w = ix.writer()
    w.add_document(id=7, path=u("/a/3"), text=u("bravo charlie delta"))
    w.add_document(id=8, path=u("/b/3"), text=u("charlie delta echo"))
    w.add_document(id=9, path=u("/c/3"), text=u("delta echo alfa"))
    w.commit(merge=False)

    with ix.searcher() as s:
        fq = query.Or([query.Prefix("path", "/a"),
                       query.Prefix("path", "/b")])
        r = s.search(query.Term("text", "alfa"), filter=fq)
        assert_equal([d["id"] for d in r], [1, 4, 5])

        r = s.search(query.Term("text", "bravo"), filter=fq)
        assert_equal([d["id"] for d in r], [1, 2, 5, 7, ])

Example #44

0

Show file

def test_fieldboost():
    schema = fields.Schema(id=fields.STORED, a=fields.TEXT, b=fields.TEXT)
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(id=0, a=u("alfa bravo charlie"), b=u("echo foxtrot india"))
    w.add_document(id=1, a=u("delta bravo charlie"), b=u("alfa alfa alfa"))
    w.add_document(id=2, a=u("alfa alfa alfa"), b=u("echo foxtrot india"))
    w.add_document(id=3, a=u("alfa sierra romeo"), b=u("alfa tango echo"))
    w.add_document(id=4, a=u("bravo charlie delta"), b=u("alfa foxtrot india"))
    w.add_document(id=5, a=u("alfa alfa echo"), b=u("tango tango tango"))
    w.add_document(id=6, a=u("alfa bravo echo"), b=u("alfa alfa tango"))
    w.commit()

    def field_booster(fieldname, factor=2.0):
        "Returns a function which will boost the given field in a query tree"
        def booster_fn(obj):
            if obj.is_leaf() and obj.field() == fieldname:
                obj = copy.deepcopy(obj)
                obj.boost *= factor
                return obj
            else:
                return obj
        return booster_fn

    with ix.searcher() as s:
        q = query.Or([query.Term("a", u("alfa")),
                      query.Term("b", u("alfa"))])
        q = q.accept(field_booster("a", 100.0))
        assert_equal(text_type(q), text_type("(a:alfa^100.0 OR b:alfa)"))
        r = s.search(q)
        assert_equal([hit["id"] for hit in r], [2, 5, 6, 3, 0, 1, 4])

Example #45

0

Show file

def test_random_intersections():
    domain = [
        u("alpha"),
        u("bravo"),
        u("charlie"),
        u("delta"),
        u("echo"),
        u("foxtrot"),
        u("golf"),
        u("hotel"),
        u("india"),
        u("juliet"),
        u("kilo"),
        u("lima"),
        u("mike")
    ]
    segments = 5
    docsperseg = 50
    fieldlimits = (3, 10)
    documents = []

    schema = fields.Schema(key=fields.STORED, value=fields.TEXT(stored=True))
    st = RamStorage()
    ix = st.create_index(schema)

    # Create docsperseg * segments documents containing random words from
    # the domain list. Add the documents to the index, but also keep them
    # in the "documents" list for the sanity check
    for i in xrange(segments):
        w = ix.writer()
        for j in xrange(docsperseg):
            docnum = i * docsperseg + j
            # Create a string of random words
            doc = u(" ").join(
                choice(domain) for _ in xrange(randint(*fieldlimits)))
            # Add the string to the index
            w.add_document(key=docnum, value=doc)
            # Add a (docnum, string) tuple to the documents list
            documents.append((docnum, doc))
        w.commit()
    assert len(ix._segments()) != 1

    testcount = 20
    testlimits = (2, 5)

    with ix.searcher() as s:
        for i in xrange(s.doc_count_all()):
            assert s.stored_fields(i).get("key") is not None

        for _ in xrange(testcount):
            # Create a random list of words and manually do an intersection of
            # items in "documents" that contain the words ("target").
            words = sample(domain, randint(*testlimits))
            target = []
            for docnum, doc in documents:
                if all((doc.find(w) > -1) for w in words):
                    target.append(docnum)
            target.sort()

            # Create a query from the list of words and get two matchers from
            # it.
            q = And([Term("value", w) for w in words])
            m1 = q.matcher(s)
            m2 = q.matcher(s)

            # Try getting the list of IDs from all_ids()
            ids1 = list(m1.all_ids())

            # Try getting the list of IDs using id()/next()
            ids2 = []
            while m2.is_active():
                ids2.append(m2.id())
                m2.next()

            # Check that the two methods return the same list
            assert ids1 == ids2

            # Check that the IDs match the ones we manually calculated
            assert _keys(s, ids1) == target

Example #46

0

Show file

def test_andnot():
    _run_query(query.AndNot(query.Term("name", u("yellow")),
                            query.Term("value", u("purple"))),
               [u("E")])

Example #47

0

Show file

def test_intersection():
    schema = fields.Schema(key=fields.ID(stored=True),
                           value=fields.TEXT(stored=True))
    st = RamStorage()
    ix = st.create_index(schema)

    w = ix.writer()
    w.add_document(key=u("a"), value=u("alpha bravo charlie delta"))
    w.add_document(key=u("b"), value=u("echo foxtrot alpha bravo"))
    w.add_document(key=u("c"), value=u("charlie delta golf hotel"))
    w.commit()

    w = ix.writer()
    w.add_document(key=u("d"), value=u("india alpha bravo charlie"))
    w.add_document(key=u("e"), value=u("delta bravo india bravo"))
    w.commit()

    with ix.searcher() as s:
        q = And([Term("value", u("bravo")), Term("value", u("delta"))])
        m = q.matcher(s)
        assert _keys(s, m.all_ids()) == ["a", "e"]

        q = And([Term("value", u("bravo")), Term("value", u("alpha"))])
        m = q.matcher(s)
        assert _keys(s, m.all_ids()) == ["a", "b", "d"]

Example #48

0

Show file

def test_topnot():
    _run_query(query.Not(query.Term("value", "red")), [u("B"), "C", "E"])
    _run_query(query.Not(query.Term("name", "yellow")), [u("B"), u("C"),
                                                         u("D")])

Example #49

0

Show file

def test_add_spelling():
    schema = fields.Schema(text1=fields.TEXT, text2=fields.TEXT)
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(text1=u("render zorro kaori postal"), text2=u("alfa"))
    w.add_document(text1=u("reader zebra koala pastry"), text2=u("alpa"))
    w.add_document(text1=u("leader libra ooala paster"), text2=u("alpha"))
    w.add_document(text1=u("feeder lorry zoala baster"), text2=u("olfo"))
    w.commit()

    with ix.reader() as r:
        assert not r.has_word_graph("text1")
        assert not r.has_word_graph("text2")

    from whoosh.filedb.filewriting import add_spelling
    add_spelling(ix, ["text1", "text2"])

    with ix.reader() as r:
        assert r.has_word_graph("text1")
        assert r.has_word_graph("text2")

        sp = spelling.ReaderCorrector(r, "text1")
        assert_equal(sp.suggest(u("kaola"), maxdist=1), [u('koala')])
        assert_equal(
            sp.suggest(u("kaola"), maxdist=2),
            [u('koala'), u('kaori'),
             u('ooala'), u('zoala')])

        sp = spelling.ReaderCorrector(r, "text2")
        assert_equal(sp.suggest(u("alfo"), maxdist=1), [u("alfa"), u("olfo")])

Example #50

0

Show file

def test_require():
    _run_query(query.Require(query.Term("value", u("red")),
                             query.Term("name", u("yellow"))),
               [u("A")])

Example #51

0

Show file

File: test_flexible.py Project: datakortet/whoosh

def test_addfield():
    schema = fields.Schema(id=fields.ID(stored=True), content=fields.TEXT)
    with TempIndex(schema, "addfield") as ix:
        w = ix.writer()
        w.add_document(id=u("a"), content=u("alfa"))
        w.add_document(id=u("b"), content=u("bravo"))
        w.add_document(id=u("c"), content=u("charlie"))
        w.commit()

        ix.add_field("added", fields.KEYWORD(stored=True))

        w = ix.writer()
        w.add_document(id=u("d"), content=u("delta"), added=u("fourth"))
        w.add_document(id=u("e"), content=u("echo"), added=u("fifth"))
        w.commit(merge=False)

        with ix.searcher() as s:
            assert ("id", "d") in s.reader()
            assert_equal(s.document(id="d"), {"id": "d", "added": "fourth"})
            assert_equal(s.document(id="b"), {"id": "b"})

Example #52

0

Show file

File: test_spelling.py Project: sangensong/whoosh-1

from __future__ import with_statement
import gzip

from whoosh import analysis, fields, highlight, query, spelling
from whoosh.compat import u
from whoosh.qparser import QueryParser
from whoosh.support.levenshtein import levenshtein
from whoosh.util.testing import TempIndex

_wordlist = sorted(
    u("render animation animate shader shading zebra koala"
      "ready kismet reaction page delete quick fox jumped"
      "over lazy dog wicked erase red team yellow under interest"
      "open print acrid sear deaf feed grow heal jolly kilt"
      "low zone xylophone crown vale brown neat meat reduction"
      "blunder preaction lamppost").split())


def test_list_corrector():
    corr = spelling.ListCorrector(_wordlist)
    typo = "reoction"
    sugs = list(corr.suggest(typo, maxdist=2))
    target = []
    for lev_dist in range(1, 3):
        # sugs will return suggest first ordered by levenshtein distance
        # then second order by dictionary order
        target += [
            w for w in _wordlist
            if levenshtein(typo, w) <= lev_dist and w not in target
        ]
    assert sugs == target

Example #53

0

Show file

File: test_flexible.py Project: datakortet/whoosh

def test_removefield():
    schema = fields.Schema(id=fields.ID(stored=True),
                           content=fields.TEXT,
                           city=fields.KEYWORD(stored=True))
    with TempIndex(schema, "removefield") as ix:
        w = ix.writer()
        w.add_document(id=u("b"), content=u("bravo"), city=u("baghdad"))
        w.add_document(id=u("c"), content=u("charlie"), city=u("cairo"))
        w.add_document(id=u("d"), content=u("delta"), city=u("dakar"))
        w.commit()

        with ix.searcher() as s:
            assert_equal(s.document(id=u("c")), {"id": "c", "city": "cairo"})

        w = ix.writer()
        w.remove_field("content")
        w.remove_field("city")
        w.commit()

        ixschema = ix._current_schema()
        assert_equal(ixschema.names(), ["id"])
        assert_equal(ixschema.stored_names(), ["id"])

        with ix.searcher() as s:
            assert ("content", u("charlie")) not in s.reader()
            assert_equal(s.document(id=u("c")), {"id": u("c")})

Example #54

0

Show file

def test_reader_corrector():
    schema = fields.Schema(text=fields.TEXT(spelling=True))
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(text=u("render zorro kaori postal"))
    w.add_document(text=u("reader zebra koala pastry"))
    w.add_document(text=u("leader libra ooala paster"))
    w.add_document(text=u("feeder lorry zoala baster"))
    w.commit()

    with ix.reader() as r:
        assert r.has_word_graph("text")
        sp = spelling.ReaderCorrector(r, "text")
        assert_equal(sp.suggest(u("kaola"), maxdist=1), [u('koala')])
        assert_equal(
            sp.suggest(u("kaola"), maxdist=2),
            [u('koala'), u('kaori'),
             u('ooala'), u('zoala')])

Example #55

0

Show file

File: test_reading.py Project: sangensong/whoosh-1

def _multi_segment_index():
    ix = _create_index()
    w = ix.writer()
    w.add_document(f1=u("A B C"), f2=u("1 2 3"), f3=u("X Y Z"))
    w.add_document(f1=u("D E F"), f2=u("4 5 6"), f3=u("Q R S"))
    w.commit()

    w = ix.writer()
    w.add_document(f1=u("A E C"), f2=u("1 4 6"), f3=u("X Q S"))
    w.add_document(f1=u("A A A"), f2=u("2 3 5"), f3=u("Y R Z"))
    w.commit(merge=False)

    w = ix.writer()
    w.add_document(f1=u("A B"), f2=u("1 2"), f3=u("X Y"))
    w.commit(merge=False)

    return ix

Example #56

0

Show file

File: test_flexible.py Project: datakortet/whoosh

def test_optimize_away():
    schema = fields.Schema(id=fields.ID(stored=True),
                           content=fields.TEXT,
                           city=fields.KEYWORD(stored=True))
    with TempIndex(schema, "optimizeaway") as ix:
        w = ix.writer()
        w.add_document(id=u("b"), content=u("bravo"), city=u("baghdad"))
        w.add_document(id=u("c"), content=u("charlie"), city=u("cairo"))
        w.add_document(id=u("d"), content=u("delta"), city=u("dakar"))
        w.commit()

        with ix.searcher() as s:
            assert_equal(s.document(id=u("c")), {"id": "c", "city": "cairo"})

        w = ix.writer()
        w.remove_field("content")
        w.remove_field("city")
        w.commit(optimize=True)

        with ix.searcher() as s:
            assert ("content", u("charlie")) not in s.reader()
            assert_equal(s.document(id=u("c")), {"id": u("c")})

Example #57

0

Show file

def test_short_prefix():
    s = fields.Schema(name=fields.ID, value=fields.TEXT)
    qp = qparser.QueryParser("value", schema=s)
    q = qp.parse(u("s*"))
    assert_equal(q.__class__.__name__, "Prefix")
    assert_equal(q.text, "s")

Example #58

0

Show file

 def __unicode__(self):
     return u("%s:<%s>") % (self.fieldname, self.text)

Example #59

0

Show file

def test_term():
    _run_query(query.Term("name", u("yellow")), [u("A"), u("E")])
    _run_query(query.Term("value", u("zeta")), [])
    _run_query(query.Term("value", u("red")), [u("A"), u("D")])

Example #60

0

Show file

File: test_reading.py Project: sangensong/whoosh-1

def test_first_id():
    schema = fields.Schema(path=fields.ID(stored=True))
    ix = RamStorage().create_index(schema)

    w = ix.writer()
    w.add_document(path=u("/a"))
    w.add_document(path=u("/b"))
    w.add_document(path=u("/c"))
    w.commit()

    r = ix.reader()
    docid = r.first_id("path", u("/b"))
    assert r.stored_fields(docid) == {"path": "/b"}

    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(path=u("/a"))
    w.add_document(path=u("/b"))
    w.add_document(path=u("/c"))
    w.commit(merge=False)

    w = ix.writer()
    w.add_document(path=u("/d"))
    w.add_document(path=u("/e"))
    w.add_document(path=u("/f"))
    w.commit(merge=False)

    w = ix.writer()
    w.add_document(path=u("/g"))
    w.add_document(path=u("/h"))
    w.add_document(path=u("/i"))
    w.commit(merge=False)

    r = ix.reader()
    assert r.__class__ == reading.MultiReader
    docid = r.first_id("path", u("/e"))
    assert r.stored_fields(docid) == {"path": "/e"}

    with pytest.raises(NotImplementedError):
        r.cursor("path")