async def test_scorer(modclient: redis.Redis): await modclient.ft().create_index((TextField("description"),)) await modclient.ft().add_document( "doc1", description="The quick brown fox jumps over the lazy dog" ) await modclient.ft().add_document( "doc2", description="Quick alice was beginning to get very tired of sitting by her quick sister on the bank, and of having nothing to do.", # noqa ) # default scorer is TFIDF res = await modclient.ft().search(Query("quick").with_scores()) assert 1.0 == res.docs[0].score res = await modclient.ft().search(Query("quick").scorer("TFIDF").with_scores()) assert 1.0 == res.docs[0].score res = await ( modclient.ft().search(Query("quick").scorer("TFIDF.DOCNORM").with_scores()) ) assert 0.1111111111111111 == res.docs[0].score res = await modclient.ft().search(Query("quick").scorer("BM25").with_scores()) assert 0.17699114465425977 == res.docs[0].score res = await modclient.ft().search(Query("quick").scorer("DISMAX").with_scores()) assert 2.0 == res.docs[0].score res = await modclient.ft().search(Query("quick").scorer("DOCSCORE").with_scores()) assert 1.0 == res.docs[0].score res = await modclient.ft().search(Query("quick").scorer("HAMMING").with_scores()) assert 0.0 == res.docs[0].score
async def test_filters(modclient: redis.Redis): await ( modclient.ft().create_index( (TextField("txt"), NumericField("num"), GeoField("loc")) ) ) await ( modclient.ft().add_document( "doc1", txt="foo bar", num=3.141, loc="-0.441,51.458" ) ) await modclient.ft().add_document("doc2", txt="foo baz", num=2, loc="-0.1,51.2") await waitForIndex(modclient, "idx") # Test numerical filter q1 = Query("foo").add_filter(NumericFilter("num", 0, 2)).no_content() q2 = ( Query("foo") .add_filter(NumericFilter("num", 2, NumericFilter.INF, minExclusive=True)) .no_content() ) res1, res2 = await modclient.ft().search(q1), await modclient.ft().search(q2) assert 1 == res1.total assert 1 == res2.total assert "doc2" == res1.docs[0].id assert "doc1" == res2.docs[0].id # Test geo filter q1 = Query("foo").add_filter(GeoFilter("loc", -0.44, 51.45, 10)).no_content() q2 = Query("foo").add_filter(GeoFilter("loc", -0.44, 51.45, 100)).no_content() res1, res2 = await modclient.ft().search(q1), await modclient.ft().search(q2) assert 1 == res1.total assert 2 == res2.total assert "doc1" == res1.docs[0].id # Sort results, after RDB reload order may change res = [res2.docs[0].id, res2.docs[1].id] res.sort() assert ["doc1", "doc2"] == res
async def test_no_create(modclient: redis.Redis): await ( modclient.ft().create_index((TextField("f1"), TextField("f2"), TextField("f3"))) ) await modclient.ft().add_document("doc1", f1="f1_val", f2="f2_val") await modclient.ft().add_document("doc2", f1="f1_val", f2="f2_val") await modclient.ft().add_document("doc1", f3="f3_val", no_create=True) await modclient.ft().add_document("doc2", f3="f3_val", no_create=True, partial=True) await waitForIndex(modclient, "idx") # Search for f3 value. All documents should have it res = await modclient.ft().search("@f3:f3_val") assert 2 == res.total # Only the document updated with PARTIAL should still have f1 and f2 values res = await modclient.ft().search("@f3:f3_val @f2:f2_val @f1:f1_val") assert 1 == res.total with pytest.raises(redis.ResponseError): await ( modclient.ft().add_document( "doc3", f2="f2_val", f3="f3_val", no_create=True ) )
async def test_auto_complete(modclient: redis.Redis): n = 0 with open(TITLES_CSV) as f: cr = csv.reader(f) for row in cr: n += 1 term, score = row[0], float(row[1]) assert n == await modclient.ft().sugadd("ac", Suggestion(term, score=score)) assert n == await modclient.ft().suglen("ac") ret = await modclient.ft().sugget("ac", "bad", with_scores=True) assert 2 == len(ret) assert "badger" == ret[0].string assert isinstance(ret[0].score, float) assert 1.0 != ret[0].score assert "badalte rishtey" == ret[1].string assert isinstance(ret[1].score, float) assert 1.0 != ret[1].score ret = await modclient.ft().sugget("ac", "bad", fuzzy=True, num=10) assert 10 == len(ret) assert 1.0 == ret[0].score strs = {x.string for x in ret} for sug in strs: assert 1 == await modclient.ft().sugdel("ac", sug) # make sure a second delete returns 0 for sug in strs: assert 0 == await modclient.ft().sugdel("ac", sug) # make sure they were actually deleted ret2 = await modclient.ft().sugget("ac", "bad", fuzzy=True, num=10) for sug in ret2: assert sug.string not in strs # Test with payload await modclient.ft().sugadd("ac", Suggestion("pay1", payload="pl1")) await modclient.ft().sugadd("ac", Suggestion("pay2", payload="pl2")) await modclient.ft().sugadd("ac", Suggestion("pay3", payload="pl3")) sugs = await ( modclient.ft().sugget("ac", "pay", with_payloads=True, with_scores=True) ) assert 3 == len(sugs) for sug in sugs: assert sug.payload assert sug.payload.startswith("pl")
async def test_example(modclient: redis.Redis): # Creating the index definition and schema await ( modclient.ft().create_index((TextField("title", weight=5.0), TextField("body"))) ) # Indexing a document await modclient.ft().add_document( "doc1", title="RediSearch", body="Redisearch impements a search engine on top of redis", ) # Searching with complex parameters: q = Query("search engine").verbatim().no_content().paging(0, 5) res = await modclient.ft().search(q) assert res is not None
async def test_replace(modclient: redis.Redis): await modclient.ft().create_index((TextField("txt"),)) await modclient.ft().add_document("doc1", txt="foo bar") await modclient.ft().add_document("doc2", txt="foo bar") await waitForIndex(modclient, "idx") res = await modclient.ft().search("foo bar") assert 2 == res.total await ( modclient.ft().add_document("doc1", replace=True, txt="this is a replaced doc") ) res = await modclient.ft().search("foo bar") assert 1 == res.total assert "doc2" == res.docs[0].id res = await modclient.ft().search("replaced doc") assert 1 == res.total assert "doc1" == res.docs[0].id
async def test_spell_check(modclient: redis.Redis): await modclient.ft().create_index((TextField("f1"), TextField("f2"))) await ( modclient.ft().add_document( "doc1", f1="some valid content", f2="this is sample text" ) ) await modclient.ft().add_document("doc2", f1="very important", f2="lorem ipsum") await waitForIndex(modclient, "idx") # test spellcheck res = await modclient.ft().spellcheck("impornant") assert "important" == res["impornant"][0]["suggestion"] res = await modclient.ft().spellcheck("contnt") assert "content" == res["contnt"][0]["suggestion"] # test spellcheck with Levenshtein distance res = await modclient.ft().spellcheck("vlis") assert res == {} res = await modclient.ft().spellcheck("vlis", distance=2) assert "valid" == res["vlis"][0]["suggestion"] # test spellcheck include await modclient.ft().dict_add("dict", "lore", "lorem", "lorm") res = await modclient.ft().spellcheck("lorm", include="dict") assert len(res["lorm"]) == 3 assert ( res["lorm"][0]["suggestion"], res["lorm"][1]["suggestion"], res["lorm"][2]["suggestion"], ) == ("lorem", "lore", "lorm") assert (res["lorm"][0]["score"], res["lorm"][1]["score"]) == ("0.5", "0") # test spellcheck exclude res = await modclient.ft().spellcheck("lorm", exclude="dict") assert res == {}
async def test_summarize(modclient: redis.Redis): await createIndex(modclient.ft()) await waitForIndex(modclient, "idx") q = Query("king henry").paging(0, 1) q.highlight(fields=("play", "txt"), tags=("<b>", "</b>")) q.summarize("txt") doc = sorted((await modclient.ft().search(q)).docs)[0] assert "<b>Henry</b> IV" == doc.play assert ( "ACT I SCENE I. London. The palace. Enter <b>KING</b> <b>HENRY</b>, LORD JOHN OF LANCASTER, the EARL of WESTMORELAND, SIR... " # noqa == doc.txt ) q = Query("king henry").paging(0, 1).summarize().highlight() doc = sorted((await modclient.ft().search(q)).docs)[0] assert "<b>Henry</b> ... " == doc.play assert ( "ACT I SCENE I. London. The palace. Enter <b>KING</b> <b>HENRY</b>, LORD JOHN OF LANCASTER, the EARL of WESTMORELAND, SIR... " # noqa == doc.txt )
async def test_sort_by(modclient: redis.Redis): await ( modclient.ft().create_index( (TextField("txt"), NumericField("num", sortable=True)) ) ) await modclient.ft().add_document("doc1", txt="foo bar", num=1) await modclient.ft().add_document("doc2", txt="foo baz", num=2) await modclient.ft().add_document("doc3", txt="foo qux", num=3) # Test sort q1 = Query("foo").sort_by("num", asc=True).no_content() q2 = Query("foo").sort_by("num", asc=False).no_content() res1, res2 = await modclient.ft().search(q1), await modclient.ft().search(q2) assert 3 == res1.total assert "doc1" == res1.docs[0].id assert "doc2" == res1.docs[1].id assert "doc3" == res1.docs[2].id assert 3 == res2.total assert "doc1" == res2.docs[2].id assert "doc2" == res2.docs[1].id assert "doc3" == res2.docs[0].id
async def test_client(modclient: redis.Redis): num_docs = 500 await createIndex(modclient.ft(), num_docs=num_docs) await waitForIndex(modclient, "idx") # verify info info = await modclient.ft().info() for k in [ "index_name", "index_options", "attributes", "num_docs", "max_doc_id", "num_terms", "num_records", "inverted_sz_mb", "offset_vectors_sz_mb", "doc_table_size_mb", "key_table_size_mb", "records_per_doc_avg", "bytes_per_record_avg", "offsets_per_term_avg", "offset_bits_per_record_avg", ]: assert k in info assert modclient.ft().index_name == info["index_name"] assert num_docs == int(info["num_docs"]) res = await modclient.ft().search("henry iv") assert isinstance(res, Result) assert 225 == res.total assert 10 == len(res.docs) assert res.duration > 0 for doc in res.docs: assert doc.id assert doc.play == "Henry IV" assert len(doc.txt) > 0 # test no content res = await modclient.ft().search(Query("king").no_content()) assert 194 == res.total assert 10 == len(res.docs) for doc in res.docs: assert "txt" not in doc.__dict__ assert "play" not in doc.__dict__ # test verbatim vs no verbatim total = (await modclient.ft().search(Query("kings").no_content())).total vtotal = (await modclient.ft().search(Query("kings").no_content().verbatim())).total assert total > vtotal # test in fields txt_total = ( await modclient.ft().search(Query("henry").no_content().limit_fields("txt")) ).total play_total = ( await modclient.ft().search(Query("henry").no_content().limit_fields("play")) ).total both_total = ( await ( modclient.ft().search( Query("henry").no_content().limit_fields("play", "txt") ) ) ).total assert 129 == txt_total assert 494 == play_total assert 494 == both_total # test load_document doc = await modclient.ft().load_document("henry vi part 3:62") assert doc is not None assert "henry vi part 3:62" == doc.id assert doc.play == "Henry VI Part 3" assert len(doc.txt) > 0 # test in-keys ids = [x.id for x in (await modclient.ft().search(Query("henry"))).docs] assert 10 == len(ids) subset = ids[:5] docs = await modclient.ft().search(Query("henry").limit_ids(*subset)) assert len(subset) == docs.total ids = [x.id for x in docs.docs] assert set(ids) == set(subset) # test slop and in order assert 193 == (await modclient.ft().search(Query("henry king"))).total assert ( 3 == (await modclient.ft().search(Query("henry king").slop(0).in_order())).total ) assert ( 52 == (await modclient.ft().search(Query("king henry").slop(0).in_order())).total ) assert 53 == (await modclient.ft().search(Query("henry king").slop(0))).total assert 167 == (await modclient.ft().search(Query("henry king").slop(100))).total # test delete document await modclient.ft().add_document("doc-5ghs2", play="Death of a Salesman") res = await modclient.ft().search(Query("death of a salesman")) assert 1 == res.total assert 1 == await modclient.ft().delete_document("doc-5ghs2") res = await modclient.ft().search(Query("death of a salesman")) assert 0 == res.total assert 0 == await modclient.ft().delete_document("doc-5ghs2") await modclient.ft().add_document("doc-5ghs2", play="Death of a Salesman") res = await modclient.ft().search(Query("death of a salesman")) assert 1 == res.total await modclient.ft().delete_document("doc-5ghs2")
async def test_explain(modclient: redis.Redis): await ( modclient.ft().create_index((TextField("f1"), TextField("f2"), TextField("f3"))) ) res = await modclient.ft().explain("@f3:f3_val @f2:f2_val @f1:f1_val") assert res