def test_all_parents_deleted(): schema = fields.Schema(kind=fields.ID, name=fields.KEYWORD(scorable=True, stored=True)) ix = RamStorage().create_index(schema) with ix.writer() as w: with w.group(): w.add_document(kind=u("class"), name=u("Index")) w.add_document(kind=u("method"), name=u("add document")) w.add_document(kind=u("method"), name=u("add reader")) w.add_document(kind=u("method"), name=u("close")) with w.group(): w.add_document(kind=u("class"), name=u("Accumulator")) w.add_document(kind=u("method"), name=u("add")) w.add_document(kind=u("method"), name=u("get result")) with w.group(): w.add_document(kind=u("class"), name=u("Calculator")) w.add_document(kind=u("method"), name=u("add")) w.add_document(kind=u("method"), name=u("add all")) w.add_document(kind=u("method"), name=u("add some")) w.add_document(kind=u("method"), name=u("multiply")) w.add_document(kind=u("method"), name=u("close")) with w.group(): w.add_document(kind=u("class"), name=u("Deleter")) w.add_document(kind=u("method"), name=u("add")) w.add_document(kind=u("method"), name=u("delete")) with ix.writer() as w: w.delete_by_term("name", "Index") w.delete_by_term("name", "Accumulator") w.delete_by_term("name", "Calculator") w.delete_by_term("name", "Deleter") with ix.searcher() as s: q = query.NestedParent(query.Term("kind", "class"), query.Term("name", "add")) r = s.search(q) assert r.is_empty()
def setup(self): """ Defers loading until needed. """ from haystack import connections new_index = False # Make sure the index is there. if self.use_file_storage and not os.path.exists(self.path): os.makedirs(self.path) new_index = True if self.use_file_storage and not os.access(self.path, os.W_OK): raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % self.path) if self.use_file_storage: self.storage = FileStorage(self.path) else: global LOCALS if getattr(LOCALS, 'RAM_STORE', None) is None: LOCALS.RAM_STORE = RamStorage() self.storage = LOCALS.RAM_STORE self.content_field_name, self.schema = self.build_schema(connections[self.connection_alias].get_unified_index().all_searchfields()) self.parser = QueryParser(self.content_field_name, schema=self.schema) if new_index is True: self.index = self.storage.create_index(self.schema) else: try: self.index = self.storage.open_index(schema=self.schema) except index.EmptyIndexError: self.index = self.storage.create_index(self.schema) self.setup_complete = True
def test_page_counts(): from whoosh.scoring import Frequency schema = fields.Schema(id=fields.ID(stored=True)) st = RamStorage() ix = st.create_index(schema) w = ix.writer() for i in xrange(10): w.add_document(id=text_type(i)) w.commit() with ix.searcher(weighting=Frequency) as s: q = query.Every("id") r = s.search(q) assert_equal(len(r), 10) assert_raises(ValueError, s.search_page, q, 0) r = s.search_page(q, 1, 5) assert_equal(len(r), 10) assert_equal(r.pagecount, 2) r = s.search_page(q, 1, 5) assert_equal(len(r), 10) assert_equal(r.pagecount, 2) r = s.search_page(q, 2, 5) assert_equal(len(r), 10) assert_equal(r.pagecount, 2) assert_equal(r.pagenum, 2) r = s.search_page(q, 1, 10) assert_equal(len(r), 10) assert_equal(r.pagecount, 1) assert_equal(r.pagenum, 1)
def test_resultspage(): schema = fields.Schema(id=fields.STORED, content=fields.TEXT(stored=True)) ix = RamStorage().create_index(schema) domain = ("alfa", "bravo", "bravo", "charlie", "delta") w = ix.writer() for i, lst in enumerate(permutations(domain, 3)): w.add_document(id=text_type(i), content=u(" ").join(lst)) w.commit() with ix.searcher() as s: q = query.Term("content", u("bravo")) r = s.search(q, limit=10) tops = list(r) rp = s.search_page(q, 1, pagelen=5) assert rp.scored_length() == 5 assert list(rp) == tops[0:5] assert rp[10:] == [] rp = s.search_page(q, 2, pagelen=5) assert list(rp) == tops[5:10] rp = s.search_page(q, 1, pagelen=10) assert len(rp) == 54 assert rp.pagecount == 6 rp = s.search_page(q, 6, pagelen=10) assert len(list(rp)) == 4 assert rp.is_last_page() with pytest.raises(ValueError): s.search_page(q, 0) assert s.search_page(q, 10).pagenum == 6 rp = s.search_page(query.Term("content", "glonk"), 1) assert len(rp) == 0 assert rp.is_last_page()
def test_snippets(): ana = analysis.StemmingAnalyzer() schema = fields.Schema(text=fields.TEXT(stored=True, analyzer=ana)) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(text=u( "Lay out the rough animation by creating the important poses where they occur on the timeline." )) w.add_document(text=u( "Set key frames on everything that's key-able. This is for control and predictability: you don't want to accidentally leave something un-keyed. This is also much faster than selecting the parameters to key." )) w.add_document(text=u( "Use constant (straight) or sometimes linear transitions between keyframes in the channel editor. This makes the character jump between poses." )) w.add_document(text=u( "Keying everything gives quick, immediate results. But it can become difficult to tweak the animation later, especially for complex characters." )) w.add_document(text=u( "Copy the current pose to create the next one: pose the character, key everything, then copy the keyframe in the playbar to another frame, and key everything at that frame." )) w.commit() target = [ "Set KEY frames on everything that's KEY-able", "Copy the current pose to create the next one: pose the character, KEY everything, then copy the keyframe in the playbar to another frame, and KEY everything at that frame", "KEYING everything gives quick, immediate results" ] with ix.searcher() as s: qp = qparser.QueryParser("text", ix.schema) q = qp.parse(u("key")) r = s.search(q, terms=True) r.fragmenter = highlight.SentenceFragmenter() r.formatter = highlight.UppercaseFormatter() assert_equal(sorted([hit.highlights("text", top=1) for hit in r]), sorted(target))
def test_compound_sort(): fspec = fields.KEYWORD(stored=True, sortable=True) schema = fields.Schema(a=fspec, b=fspec, c=fspec) ix = RamStorage().create_index(schema) alist = u("alfa bravo alfa bravo alfa bravo alfa bravo alfa bravo").split() blist = u("alfa bravo charlie alfa bravo charlie alfa bravo charlie alfa").split() clist = u("alfa bravo charlie delta echo foxtrot golf hotel india juliet").split() assert all(len(ls) == 10 for ls in (alist, blist, clist)) with ix.writer() as w: for i in xrange(10): w.add_document(a=alist[i], b=blist[i], c=clist[i]) with ix.searcher() as s: q = query.Every() sortedby = [sorting.FieldFacet("a"), sorting.FieldFacet("b", reverse=True), sorting.FieldFacet("c")] r = s.search(q, sortedby=sortedby) output = [] for hit in r: output.append(" ".join((hit["a"], hit["b"], hit["c"]))) assert output == [ "alfa charlie charlie", "alfa charlie india", "alfa bravo echo", "alfa alfa alfa", "alfa alfa golf", "bravo charlie foxtrot", "bravo bravo bravo", "bravo bravo hotel", "bravo alfa delta", "bravo alfa juliet", ]
class test_translate(): domain = [("alfa", 100, 50), ("bravo", 20, 80), ("charlie", 10, 10), ("delta", 82, 39), ("echo", 20, 73), ("foxtrot", 81, 59), ("golf", 39, 93), ("hotel", 57, 48), ("india", 84, 75), ] schema = fields.Schema(name=fields.TEXT(sortable=True), a=fields.NUMERIC(sortable=True), b=fields.NUMERIC(sortable=True)) ix = RamStorage().create_index(schema) with ix.writer() as w: for name, a, b in domain: w.add_document(name=u(name), a=a, b=b) with ix.searcher() as s: q = query.Every() # Baseline: just sort by a field r = s.search(q, sortedby="a") assert " ".join([hit["name"] for hit in r]) == "charlie bravo echo golf hotel foxtrot delta india alfa" # Sort by reversed name target = [x[0] for x in sorted(domain, key=lambda x: x[0][::-1])] tf = sorting.TranslateFacet(lambda name: name[::-1], sorting.FieldFacet("name")) r = s.search(q, sortedby=tf) assert [hit["name"] for hit in r] == target # Sort by average of a and b def avg(a, b): return (a + b) / 2 target = [x[0] for x in sorted(domain, key=lambda x: (x[1] + x[2]) / 2)] af = sorting.FieldFacet("a") bf = sorting.FieldFacet("b") tf = sorting.TranslateFacet(avg, af, bf) r = s.search(q, sortedby=tf) assert [hit["name"] for hit in r] == target
def test_terms(): schema = fields.Schema(text=fields.TEXT(stored=True)) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(text=u("alfa sierra tango")) w.add_document(text=u("bravo charlie delta")) w.add_document(text=u("charlie delta echo")) w.add_document(text=u("delta echo foxtrot")) w.commit() qp = qparser.QueryParser("text", ix.schema) q = qp.parse(u("(bravo AND charlie) OR foxtrot OR missing")) r = ix.searcher().search(q, terms=True) fieldobj = schema["text"] def txts(tset): return sorted(fieldobj.from_bytes(t[1]) for t in tset) assert txts(r.matched_terms()) == ["bravo", "charlie", "foxtrot"] for hit in r: value = hit["text"] for txt in txts(hit.matched_terms()): assert txt in value
def test_extend_empty(): schema = fields.Schema(id=fields.STORED, words=fields.KEYWORD) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(id=1, words=u("alfa bravo charlie")) w.add_document(id=2, words=u("bravo charlie delta")) w.add_document(id=3, words=u("charlie delta echo")) w.add_document(id=4, words=u("delta echo foxtrot")) w.add_document(id=5, words=u("echo foxtrot golf")) w.commit() with ix.searcher() as s: # Get an empty results object r1 = s.search(query.Term("words", u("hotel"))) # Copy it r1c = r1.copy() # Get a non-empty results object r2 = s.search(query.Term("words", u("delta"))) # Copy it r2c = r2.copy() # Extend r1 with r2 r1c.extend(r2c) assert [hit["id"] for hit in r1c] == [2, 3, 4] assert r1c.scored_length() == 3
def setup(self): """ Defers loading until needed. """ new_index = False # Make sure the index is there. if self.use_file_storage and not os.path.exists(settings.HAYSTACK_WHOOSH_PATH): os.makedirs(settings.HAYSTACK_WHOOSH_PATH) new_index = True if self.use_file_storage and not os.access(settings.HAYSTACK_WHOOSH_PATH, os.W_OK): raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % settings.HAYSTACK_WHOOSH_PATH) if self.use_file_storage: self.storage = FileStorage(settings.HAYSTACK_WHOOSH_PATH) else: global LOCALS if LOCALS.RAM_STORE is None: LOCALS.RAM_STORE = RamStorage() self.storage = LOCALS.RAM_STORE self.content_field_name, self.schema = self.build_schema(self.site.all_searchfields()) self.parser = QueryParser(self.content_field_name, schema=self.schema) if new_index is True: self.index = self.storage.create_index(self.schema) else: try: self.index = self.storage.open_index(schema=self.schema) except index.EmptyIndexError: self.index = self.storage.create_index(self.schema) self.setup_complete = True
def test_highlight_daterange(): from datetime import datetime schema = fields.Schema(id=fields.ID(unique=True, stored=True), title=fields.TEXT(stored=True), content=fields.TEXT(stored=True), released=fields.DATETIME(stored=True)) ix = RamStorage().create_index(schema) w = ix.writer() w.update_document( id=u('1'), title=u('Life Aquatic'), content=u('A nautic film crew sets out to kill a gigantic shark.'), released=datetime(2004, 12, 25) ) w.update_document( id=u('2'), title=u('Darjeeling Limited'), content=u('Three brothers meet in India for a life changing train ' + 'journey.'), released=datetime(2007, 10, 27) ) w.commit() s = ix.searcher() r = s.search(Term('content', u('train')), terms=True) assert_equal(len(r), 1) assert_equal(r[0]["id"], "2") assert_equal(r[0].highlights("content"), 'for a life changing ' + '<b class="match term0">train</b> journey') r = s.search(DateRange('released', datetime(2007, 1, 1), None)) assert_equal(len(r), 1) assert_equal(r[0].highlights("content"), '')
def make_index(self): s = fields.Schema(key=fields.ID(stored=True), name=fields.TEXT, value=fields.TEXT) st = RamStorage() ix = st.create_index(s) w = ix.writer() w.add_document(key=u"A", name=u"Yellow brown", value=u"Blue red green render purple?") w.add_document(key=u"B", name=u"Alpha beta", value=u"Gamma delta epsilon omega.") w.add_document(key=u"C", name=u"One two", value=u"Three rendered four five.") w.add_document(key=u"D", name=u"Quick went", value=u"Every red town.") w.add_document(key=u"E", name=u"Yellow uptown", value=u"Interest rendering outer photo!") w.commit() return ix
def test_persistent_cache(): schema = fields.Schema(id=fields.ID(stored=True)) st = RamStorage() ix = st.create_index(schema) with ix.writer() as w: for term in u("charlie alfa echo bravo delta").split(): w.add_document(id=term) ix = st.open_index() with ix.reader() as r: _ = r.fieldcache("id") del _ gc.collect() ix = st.open_index() with ix.reader() as r: assert r.fieldcache_available("id") assert not r.fieldcache_loaded("id") fc = r.fieldcache("id") assert r.fieldcache_loaded("id") assert_equal(list(fc.order), [3, 1, 5, 2, 4]) assert_equal( list(fc.texts), [u('\uffff'), 'alfa', 'bravo', 'charlie', 'delta', 'echo'])
def test_pages_with_filter(): from whoosh.scoring import Frequency schema = fields.Schema(id=fields.ID(stored=True), type=fields.TEXT(), c=fields.TEXT) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(id=u("1"), type=u("odd"), c=u("alfa alfa alfa alfa alfa alfa")) w.add_document(id=u("2"), type=u("even"), c=u("alfa alfa alfa alfa alfa")) w.add_document(id=u("3"), type=u("odd"), c=u("alfa alfa alfa alfa")) w.add_document(id=u("4"), type=u("even"), c=u("alfa alfa alfa")) w.add_document(id=u("5"), type=u("odd"), c=u("alfa alfa")) w.add_document(id=u("6"), type=u("even"), c=u("alfa")) w.commit() with ix.searcher(weighting=Frequency) as s: q = query.Term("c", u("alfa")) filterq = query.Term("type", u("even")) r = s.search(q, filter=filterq) assert [d["id"] for d in r] == ["2", "4", "6"] r = s.search_page(q, 2, pagelen=2, filter=filterq) assert [d["id"] for d in r] == ["6"]
def test_missing_field_scoring(self): schema = fields.Schema(name=fields.TEXT(stored=True), hobbies=fields.TEXT(stored=True)) storage = RamStorage() idx = storage.create_index(schema) writer = idx.writer() writer.add_document(name=u'Frank', hobbies=u'baseball, basketball') writer.commit() self.assertEqual(idx.segments[0].field_length(0), 2) # hobbies self.assertEqual(idx.segments[0].field_length(1), 1) # name writer = idx.writer() writer.add_document(name=u'Jonny') writer.commit() self.assertEqual(len(idx.segments), 1) self.assertEqual(idx.segments[0].field_length(0), 2) # hobbies self.assertEqual(idx.segments[0].field_length(1), 2) # name reader = idx.reader() searcher = Searcher(reader) parser = qparser.MultifieldParser(['name', 'hobbies'], schema=schema) q = parser.parse(u"baseball") result = searcher.search(q) self.assertEqual(len(result), 1)
def test_phrase_score(self): schema = fields.Schema(name=fields.ID(stored=True), value=fields.TEXT) storage = RamStorage() ix = storage.create_index(schema) writer = ix.writer() writer.add_document(name=u"A", value=u"Little Miss Muffet sat on a tuffet") writer.add_document( name=u"D", value=u"Gibberish blonk falunk miss muffet sat tuffet garbonzo") writer.add_document(name=u"E", value=u"Blah blah blah pancakes") writer.add_document(name=u"F", value=u"Little miss muffet little miss muffet") writer.commit() searcher = ix.searcher() q = query.Phrase("value", [u"little", u"miss", u"muffet"]) sc = q.scorer(searcher) self.assertEqual(sc.id, 0) score1 = sc.score() self.assert_(score1 > 0) sc.next() self.assertEqual(sc.id, 3) self.assert_(sc.score() > score1)
def test_lengths_ram(): s = fields.Schema(f1=fields.KEYWORD(stored=True, scorable=True), f2=fields.KEYWORD(stored=True, scorable=True)) st = RamStorage() ix = st.create_index(s) w = ix.writer() w.add_document(f1=u("A B C D E"), f2=u("X Y Z")) w.add_document(f1=u("B B B B C D D Q"), f2=u("Q R S T")) w.add_document(f1=u("D E F"), f2=u("U V A B C D E")) w.commit() dr = ix.reader() assert_equal(dr.stored_fields(0)["f1"], "A B C D E") assert_equal(dr.doc_field_length(0, "f1"), 5) assert_equal(dr.doc_field_length(1, "f1"), 8) assert_equal(dr.doc_field_length(2, "f1"), 3) assert_equal(dr.doc_field_length(0, "f2"), 3) assert_equal(dr.doc_field_length(1, "f2"), 4) assert_equal(dr.doc_field_length(2, "f2"), 7) assert_equal(dr.field_length("f1"), 16) assert_equal(dr.field_length("f2"), 14) assert_equal(dr.max_field_length("f1"), 8) assert_equal(dr.max_field_length("f2"), 7)
def test_outofdate(): schema = fields.Schema(id=fields.ID(stored=True)) st = RamStorage() ix = st.create_index(schema) w = ix.writer() w.add_document(id=u("1")) w.add_document(id=u("2")) w.commit() s = ix.searcher() assert s.up_to_date() w = ix.writer() w.add_document(id=u("3")) w.add_document(id=u("4")) assert s.up_to_date() w.commit() assert not s.up_to_date() s = s.refresh() assert s.up_to_date() s.close()
def test_reverse_collapse(): from whoosh import sorting schema = fields.Schema(title=fields.TEXT(stored=True), content=fields.TEXT, path=fields.ID(stored=True), tags=fields.KEYWORD, order=fields.NUMERIC(stored=True)) ix = RamStorage().create_index(schema) with ix.writer() as w: w.add_document(title=u"First document", content=u"This is my document!", path=u"/a", tags=u"first", order=20.0) w.add_document(title=u"Second document", content=u"This is the second example.", path=u"/b", tags=u"second", order=12.0) w.add_document(title=u"Third document", content=u"Examples are many.", path=u"/c", tags=u"third", order=15.0) w.add_document(title=u"Thirdish document", content=u"Examples are too many.", path=u"/d", tags=u"third", order=25.0) with ix.searcher() as s: q = query.Every('content') r = s.search(q) assert [hit["path"] for hit in r] == ["/a", "/b", "/c", "/d"] q = query.Or([query.Term("title", "document"), query.Term("content", "document"), query.Term("tags", "document")]) cf = sorting.FieldFacet("tags") of = sorting.FieldFacet("order", reverse=True) r = s.search(q, collapse=cf, collapse_order=of, terms=True) assert [hit["path"] for hit in r] == ["/a", "/b", "/d"]
def test_lengths_ram(): s = fields.Schema(f1=fields.KEYWORD(stored=True, scorable=True), f2=fields.KEYWORD(stored=True, scorable=True)) st = RamStorage() ix = st.create_index(s) w = ix.writer() w.add_document(f1=u("A B C D E"), f2=u("X Y Z")) w.add_document(f1=u("B B B B C D D Q"), f2=u("Q R S T")) w.add_document(f1=u("D E F"), f2=u("U V A B C D E")) w.commit() dr = ix.reader() assert dr.stored_fields(0)["f1"] == "A B C D E" assert dr.doc_field_length(0, "f1") == 5 assert dr.doc_field_length(1, "f1") == 8 assert dr.doc_field_length(2, "f1") == 3 assert dr.doc_field_length(0, "f2") == 3 assert dr.doc_field_length(1, "f2") == 4 assert dr.doc_field_length(2, "f2") == 7 assert dr.field_length("f1") == 16 assert dr.field_length("f2") == 14 assert dr.max_field_length("f1") == 8 assert dr.max_field_length("f2") == 7
def test_daterange_facet(): schema = fields.Schema(id=fields.STORED, date=fields.DATETIME) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(id=0, date=datetime(2001, 1, 15)) w.add_document(id=1, date=datetime(2001, 1, 10)) w.add_document(id=2) w.add_document(id=3, date=datetime(2001, 1, 3)) w.add_document(id=4, date=datetime(2001, 1, 8)) w.add_document(id=5, date=datetime(2001, 1, 6)) w.commit() with ix.searcher() as s: rf = sorting.DateRangeFacet("date", datetime(2001, 1, 1), datetime(2001, 1, 20), timedelta(days=5)) r = s.search(query.Every(), groupedby={"date": rf}) dt = datetime assert_equal( r.groups("date"), { (dt(2001, 1, 1, 0, 0), dt(2001, 1, 6, 0, 0)): [3], (dt(2001, 1, 6, 0, 0), dt(2001, 1, 11, 0, 0)): [1, 4, 5], (dt(2001, 1, 11, 0, 0), dt(2001, 1, 16, 0, 0)): [0], None: [2] })
def test_decimal_numeric(): from decimal import Decimal f = fields.NUMERIC(int, decimal_places=4) schema = fields.Schema(id=fields.ID(stored=True), deci=f) ix = RamStorage().create_index(schema) assert_equal(f.from_text(f.to_text(Decimal("123.56"))), Decimal("123.56")) w = ix.writer() w.add_document(id=u("a"), deci=Decimal("123.56")) w.add_document(id=u("b"), deci=Decimal("0.536255")) w.add_document(id=u("c"), deci=Decimal("2.5255")) w.add_document(id=u("d"), deci=Decimal("58")) w.commit() with ix.searcher() as s: qp = qparser.QueryParser("deci", schema) r = s.search(qp.parse("123.56")) assert_equal(r[0]["id"], "a") r = s.search(qp.parse("0.536255")) assert_equal(r[0]["id"], "b")
def test_creation(): s = fields.Schema(content=fields.TEXT(phrase=True), title=fields.TEXT(stored=True), path=fields.ID(stored=True), tags=fields.KEYWORD(stored=True), quick=fields.NGRAM, note=fields.STORED) st = RamStorage() ix = st.create_index(s) w = ix.writer() w.add_document(title=u("First"), content=u("This is the first document"), path=u("/a"), tags=u("first second third"), quick=u("First document"), note=u("This is the first document")) w.add_document(content=u("Let's try this again"), title=u("Second"), path=u("/b"), tags=u("Uno Dos Tres"), quick=u("Second document"), note=u("This is the second document")) w.commit()
def test_phrase_score(): schema = fields.Schema(name=fields.ID(stored=True), value=fields.TEXT) storage = RamStorage() ix = storage.create_index(schema) writer = ix.writer() writer.add_document(name=u("A"), value=u("Little Miss Muffet sat on a tuffet")) writer.add_document(name=u("D"), value=u("Gibberish blonk falunk miss muffet sat " + "tuffet garbonzo")) writer.add_document(name=u("E"), value=u("Blah blah blah pancakes")) writer.add_document(name=u("F"), value=u("Little miss muffet little miss muffet")) writer.commit() with ix.searcher() as s: q = query.Phrase("value", [u("little"), u("miss"), u("muffet")]) m = q.matcher(s) assert_equal(m.id(), 0) score1 = m.weight() assert score1 > 0 m.next() assert_equal(m.id(), 3) assert m.weight() > score1
def test_all(): domain = [u("alfa"), u("bravo"), u("charlie"), u("delta"), u("echo"), u("foxtrot")] schema = fields.Schema(text=fields.TEXT) storage = RamStorage() ix = storage.create_index(schema) w = ix.writer() for _ in xrange(100): w.add_document(text=u(" ").join(choice(domain) for _ in xrange(randint(10, 20)))) w.commit() # List ABCs that should not be tested abcs = () # provide initializer arguments for any weighting classes that require them init_args = {"MultiWeighting": ([scoring.BM25F()], {"text": scoring.Frequency()}), "ReverseWeighting": ([scoring.BM25F()], {})} for wclass in _weighting_classes(abcs): try: if wclass.__name__ in init_args: args, kwargs = init_args[wclass.__name__] weighting = wclass(*args, **kwargs) else: weighting = wclass() except TypeError: e = sys.exc_info()[1] raise TypeError("Error instantiating %r: %s" % (wclass, e)) with ix.searcher(weighting=weighting) as s: try: for word in domain: s.search(query.Term("text", word)) except Exception: e = sys.exc_info()[1] e.msg = "Error searching with %r: %s" % (wclass, e) raise
def test_deleted_wildcard(): schema = fields.Schema(id=fields.ID(stored=True)) st = RamStorage() ix = st.create_index(schema) w = ix.writer() w.add_document(id=u("alfa")) w.add_document(id=u("bravo")) w.add_document(id=u("charlie")) w.add_document(id=u("delta")) w.add_document(id=u("echo")) w.add_document(id=u("foxtrot")) w.commit() w = ix.writer() w.delete_by_term("id", "bravo") w.delete_by_term("id", "delta") w.delete_by_term("id", "echo") w.commit() with ix.searcher() as s: r = s.search(query.Every("id")) assert_equal(sorted([d['id'] for d in r]), ["alfa", "charlie", "foxtrot"])
def test_workflow_easy(): schema = fields.Schema(id=fields.ID(stored=True), title=fields.TEXT(stored=True)) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(id=u("1"), title=u("The man who wasn't there")) w.add_document(id=u("2"), title=u("The dog who barked at midnight")) w.add_document(id=u("3"), title=u("The invisible man")) w.add_document(id=u("4"), title=u("The girl with the dragon tattoo")) w.add_document(id=u("5"), title=u("The woman who disappeared")) w.commit() with ix.searcher() as s: # Parse the user query parser = qparser.QueryParser("title", schema=ix.schema) q = parser.parse(u("man")) r = s.search(q, terms=True) assert len(r) == 2 r.fragmenter = highlight.WholeFragmenter() r.formatter = highlight.UppercaseFormatter() outputs = [hit.highlights("title") for hit in r] assert outputs == ["The invisible MAN", "The MAN who wasn't there"]
def test_ordered(): domain = u("alfa bravo charlie delta echo foxtrot").split(" ") schema = fields.Schema(f=fields.TEXT(stored=True)) ix = RamStorage().create_index(schema) writer = ix.writer() for ls in permutations(domain): writer.add_document(f=u(" ").join(ls)) writer.commit() with ix.searcher() as s: q = query.Ordered([query.Term("f", u("alfa")), query.Term("f", u("charlie")), query.Term("f", u("echo"))]) r = s.search(q) for hit in r: ls = hit["f"].split() assert "alfa" in ls assert "charlie" in ls assert "echo" in ls a = ls.index("alfa") c = ls.index("charlie") e = ls.index("echo") assert a < c and c < e, repr(ls)
def test_query_facet(): schema = fields.Schema(id=fields.STORED, v=fields.ID) ix = RamStorage().create_index(schema) for i, ltr in enumerate(u("iacgbehdf")): w = ix.writer() w.add_document(id=i, v=ltr) w.commit(merge=False) with ix.searcher() as s: q1 = query.TermRange("v", "a", "c") q2 = query.TermRange("v", "d", "f") q3 = query.TermRange("v", "g", "i") assert [hit["id"] for hit in s.search(q1)] == [1, 2, 4] assert [hit["id"] for hit in s.search(q2)] == [5, 7, 8] assert [hit["id"] for hit in s.search(q3)] == [0, 3, 6] facet = sorting.QueryFacet({"a-c": q1, "d-f": q2, "g-i": q3}) r = s.search(query.Every(), groupedby=facet) # If you specify a facet without a name, it's automatically called # "facet" assert r.groups("facet") == {"a-c": [1, 2, 4], "d-f": [5, 7, 8], "g-i": [0, 3, 6]}
def test_stored_fields(): s = fields.Schema(a=fields.ID(stored=True), b=fields.STORED, c=fields.KEYWORD, d=fields.TEXT(stored=True)) st = RamStorage() ix = st.create_index(s) writer = ix.writer() writer.add_document(a=u("1"), b="a", c=u("zulu"), d=u("Alfa")) writer.add_document(a=u("2"), b="b", c=u("yankee"), d=u("Bravo")) writer.add_document(a=u("3"), b="c", c=u("xray"), d=u("Charlie")) writer.commit() with ix.searcher() as sr: assert_equal(sr.stored_fields(0), { "a": u("1"), "b": "a", "d": u("Alfa") }) assert_equal(sr.stored_fields(2), { "a": u("3"), "b": "c", "d": u("Charlie") }) assert_equal(sr.document(a=u("1")), { "a": u("1"), "b": "a", "d": u("Alfa") }) assert_equal(sr.document(a=u("2")), { "a": u("2"), "b": "b", "d": u("Bravo") })