def test_json_with_jsonpath(client): definition = IndexDefinition(index_type=IndexType.JSON) client.ft().create_index( ( TextField('$["prod:name"]', as_name="name"), TextField("$.prod:name", as_name="name_unsupported"), ), definition=definition, ) client.json().set("doc:1", Path.rootPath(), {"prod:name": "RediSearch"}) # query for a supported field succeeds res = client.ft().search(Query("@name:RediSearch")) assert res.total == 1 assert res.docs[0].id == "doc:1" assert res.docs[0].json == '{"prod:name":"RediSearch"}' # query for an unsupported field fails res = client.ft().search("@name_unsupported:RediSearch") assert res.total == 0 # return of a supported field succeeds res = client.ft().search(Query("@name:RediSearch").return_field("name")) assert res.total == 1 assert res.docs[0].id == "doc:1" assert res.docs[0].name == "RediSearch" # return of an unsupported field fails res = client.ft().search( Query("@name:RediSearch").return_field("name_unsupported")) assert res.total == 1 assert res.docs[0].id == "doc:1" with pytest.raises(Exception): res.docs[0].name_unsupported
def test_alias_basic(): # Creating a client with one index getClient().flushdb() index1 = getClient().ft("testAlias") index1.create_index((TextField("txt"), )) index1.add_document("doc1", txt="text goes here") index2 = getClient().ft("testAlias2") index2.create_index((TextField("txt"), )) index2.add_document("doc2", txt="text goes here") # add the actual alias and check index1.aliasadd("myalias") alias_client = getClient().ft("myalias") res = sorted(alias_client.search("*").docs, key=lambda x: x.id) assert "doc1" == res[0].id # Throw an exception when trying to add an alias that already exists with pytest.raises(Exception): index2.aliasadd("myalias") # update the alias and ensure we get doc2 index2.aliasupdate("myalias") alias_client2 = getClient().ft("myalias") res = sorted(alias_client2.search("*").docs, key=lambda x: x.id) assert "doc1" == res[0].id # delete the alias and expect an error if we try to query again index2.aliasdel("myalias") with pytest.raises(Exception): _ = alias_client2.search("*").docs[0]
def test_aggregations_sort_by_and_limit(client): client.ft().create_index(( TextField("t1"), TextField("t2"), )) client.ft().client.hset("doc1", mapping={"t1": "a", "t2": "b"}) client.ft().client.hset("doc2", mapping={"t1": "b", "t2": "a"}) # test sort_by using SortDirection req = aggregations.AggregateRequest("*").sort_by(aggregations.Asc("@t2"), aggregations.Desc("@t1")) res = client.ft().aggregate(req) assert res.rows[0] == ["t2", "a", "t1", "b"] assert res.rows[1] == ["t2", "b", "t1", "a"] # test sort_by without SortDirection req = aggregations.AggregateRequest("*").sort_by("@t1") res = client.ft().aggregate(req) assert res.rows[0] == ["t1", "a"] assert res.rows[1] == ["t1", "b"] # test sort_by with max req = aggregations.AggregateRequest("*").sort_by("@t1", max=1) res = client.ft().aggregate(req) assert len(res.rows) == 1 # test limit req = aggregations.AggregateRequest("*").sort_by("@t1").limit(1, 1) res = client.ft().aggregate(req) assert len(res.rows) == 1 assert res.rows[0] == ["t1", "b"]
def createIndex(client, num_docs=100, definition=None): try: client.create_index( (TextField("play", weight=5.0), TextField("txt"), NumericField("chapter")), definition=definition, ) except redis.ResponseError: client.dropindex(delete_documents=True) return createIndex(client, num_docs=num_docs, definition=definition) chapters = {} bzfp = TextIOWrapper(bz2.BZ2File(WILL_PLAY_TEXT), encoding="utf8") r = csv.reader(bzfp, delimiter=";") for n, line in enumerate(r): play, chapter, _, text = line[1], line[2], line[4], line[5] key = f"{play}:{chapter}".lower() d = chapters.setdefault(key, {}) d["play"] = play d["txt"] = d.get("txt", "") + " " + text d["chapter"] = int(chapter or 0) if len(chapters) == num_docs: break indexer = client.batch_indexer(chunk_size=50) assert isinstance(indexer, Search.BatchIndexer) assert 50 == indexer.chunk_size for key, doc in chapters.items(): indexer.add_document(key, **doc) indexer.commit()
def test_no_index(client): client.ft().create_index(( TextField("field"), TextField("text", no_index=True, sortable=True), NumericField("numeric", no_index=True, sortable=True), GeoField("geo", no_index=True, sortable=True), TagField("tag", no_index=True, sortable=True), )) client.ft().add_document("doc1", field="aaa", text="1", numeric="1", geo="1,1", tag="1") client.ft().add_document("doc2", field="aab", text="2", numeric="2", geo="2,2", tag="2") waitForIndex(client, "idx") res = client.ft().search(Query("@text:aa*")) assert 0 == res.total res = client.ft().search(Query("@field:aa*")) assert 2 == res.total res = client.ft().search(Query("*").sort_by("text", asc=False)) assert 2 == res.total assert "doc2" == res.docs[0].id res = client.ft().search(Query("*").sort_by("text", asc=True)) assert "doc1" == res.docs[0].id res = client.ft().search(Query("*").sort_by("numeric", asc=True)) assert "doc1" == res.docs[0].id res = client.ft().search(Query("*").sort_by("geo", asc=True)) assert "doc1" == res.docs[0].id res = client.ft().search(Query("*").sort_by("tag", asc=True)) assert "doc1" == res.docs[0].id # Ensure exception is raised for non-indexable, non-sortable fields with pytest.raises(Exception): TextField("name", no_index=True, sortable=False) with pytest.raises(Exception): NumericField("name", no_index=True, sortable=False) with pytest.raises(Exception): GeoField("name", no_index=True, sortable=False) with pytest.raises(Exception): TagField("name", no_index=True, sortable=False)
def import_brewery_geo(r, rsclient): # create the brewery redisearch index ftidxfields = [ TextField('name', weight=5.0), TextField('address'), TextField('city'), TextField('state'), TextField('country'), NumericField('id', sortable=True), GeoField('location') ] rsclient.create_index([*ftidxfields]) with open(brewerygeofile) as geofile: geo = csv.reader(geofile) for row in geo: if geo.line_num == 1: # skip the header line continue # use the brewery id to generate the brewery key added earlier brewery_key = "{}:{}".format(brewery, row[1]) # get all the data from the brewery hash binfo = r.hgetall(brewery_key) if not (any(binfo)): print( "\tERROR: Missing info for {}, skipping geo import".format( brewery_key)) continue # add the brewery document to the index ftaddfields = { 'name': binfo[b'name'].decode(), 'address': binfo[b'address1'].decode(), 'city': binfo[b'city'].decode(), 'state': binfo[b'state'].decode(), 'country': binfo[b'country'].decode(), 'id': row[1], 'location': "{},{}".format(row[3], row[2]) } try: rsclient.add_document("brewery:{}".format(row[1]), score=1.0, replace=True, partial=True, **ftaddfields) except Exception as e: print("\tERROR: Failed to add document for {}: {}".format( brewery_key, e)) continue
def initialize(): try: definition = IndexDefinition(prefix=["fts:"]) redis_conn.ft().create_index( ( TextField("id"), TextField("name"), TextField("summary"), TextField("description"), TextField("keywords"), ), definition=definition, ) except: pass
def test_search_return_fields(client): res = client.json().set( "doc:1", Path.rootPath(), { "t": "riceratops", "t2": "telmatosaurus", "n": 9072, "flt": 97.2 }, ) assert res # create index on definition = IndexDefinition(index_type=IndexType.JSON) SCHEMA = ( TextField("$.t"), NumericField("$.flt"), ) client.ft().create_index(SCHEMA, definition=definition) waitForIndex(client, "idx") total = client.ft().search(Query("*").return_field("$.t", as_field="txt")).docs assert 1 == len(total) assert "doc:1" == total[0].id assert "riceratops" == total[0].txt total = client.ft().search( Query("*").return_field("$.t2", as_field="txt")).docs assert 1 == len(total) assert "doc:1" == total[0].id assert "telmatosaurus" == total[0].txt
def test_scorer(client): client.ft().create_index((TextField("description"), )) client.ft().add_document( "doc1", description="The quick brown fox jumps over the lazy dog") client.ft().add_document( "doc2", description= "Quick alice was beginning to get very tired of sitting by her quick sister on the bank, and of having nothing to do.", # noqa ) # default scorer is TFIDF res = client.ft().search(Query("quick").with_scores()) assert 1.0 == res.docs[0].score res = client.ft().search(Query("quick").scorer("TFIDF").with_scores()) assert 1.0 == res.docs[0].score res = client.ft().search( Query("quick").scorer("TFIDF.DOCNORM").with_scores()) assert 0.1111111111111111 == res.docs[0].score res = client.ft().search(Query("quick").scorer("BM25").with_scores()) assert 0.17699114465425977 == res.docs[0].score res = client.ft().search(Query("quick").scorer("DISMAX").with_scores()) assert 2.0 == res.docs[0].score res = client.ft().search(Query("quick").scorer("DOCSCORE").with_scores()) assert 1.0 == res.docs[0].score res = client.ft().search(Query("quick").scorer("HAMMING").with_scores()) assert 0.0 == res.docs[0].score
def test_tags(client): client.ft().create_index((TextField("txt"), TagField("tags"))) tags = "foo,foo bar,hello;world" tags2 = "soba,ramen" client.ft().add_document("doc1", txt="fooz barz", tags=tags) client.ft().add_document("doc2", txt="noodles", tags=tags2) waitForIndex(client, "idx") q = Query("@tags:{foo}") res = client.ft().search(q) assert 1 == res.total q = Query("@tags:{foo bar}") res = client.ft().search(q) assert 1 == res.total q = Query("@tags:{foo\\ bar}") res = client.ft().search(q) assert 1 == res.total q = Query("@tags:{hello\\;world}") res = client.ft().search(q) assert 1 == res.total q2 = client.ft().tagvals("tags") assert (tags.split(",") + tags2.split(",")).sort() == q2.sort()
def test_dict_operations(client): client.ft().create_index((TextField("f1"), TextField("f2"))) # Add three items res = client.ft().dict_add("custom_dict", "item1", "item2", "item3") assert 3 == res # Remove one item res = client.ft().dict_del("custom_dict", "item2") assert 1 == res # Dump dict and inspect content res = client.ft().dict_dump("custom_dict") assert ["item1", "item3"] == res # Remove rest of the items before reload client.ft().dict_del("custom_dict", *res)
def test_create_json_with_alias(client): """ Create definition with IndexType.JSON as index type (ON JSON) with two fields with aliases, and use json client to test it. """ definition = IndexDefinition(prefix=["king:"], index_type=IndexType.JSON) client.ft().create_index( (TextField("$.name", as_name="name"), NumericField("$.num", as_name="num")), definition=definition, ) client.json().set("king:1", Path.rootPath(), {"name": "henry", "num": 42}) client.json().set("king:2", Path.rootPath(), { "name": "james", "num": 3.14 }) res = client.ft().search("@name:henry") assert res.docs[0].id == "king:1" assert res.docs[0].json == '{"name":"henry","num":42}' assert res.total == 1 res = client.ft().search("@num:[0 10]") assert res.docs[0].id == "king:2" assert res.docs[0].json == '{"name":"james","num":3.14}' assert res.total == 1 # Tests returns an error if path contain special characters (user should # use an alias) with pytest.raises(Exception): client.ft().search("@$.name:henry")
def test_partial(client): client.ft().create_index( (TextField("f1"), TextField("f2"), TextField("f3"))) client.ft().add_document("doc1", f1="f1_val", f2="f2_val") client.ft().add_document("doc2", f1="f1_val", f2="f2_val") client.ft().add_document("doc1", f3="f3_val", partial=True) client.ft().add_document("doc2", f3="f3_val", replace=True) waitForIndex(client, "idx") # Search for f3 value. All documents should have it res = client.ft().search("@f3:f3_val") assert 2 == res.total # Only the document updated with PARTIAL should still have f1 and f2 values res = client.ft().search("@f3:f3_val @f2:f2_val @f1:f1_val") assert 1 == res.total
def testMaxTextFields(client): # Creating the index definition client.ft().create_index((TextField("f0"), )) for x in range(1, 32): client.ft().alter_schema_add((TextField(f"f{x}"), )) # Should be too many indexes with pytest.raises(redis.ResponseError): client.ft().alter_schema_add((TextField(f"f{x}"), )) client.ft().dropindex("idx") # Creating the index definition client.ft().create_index((TextField("f0"), ), max_text_fields=True) # Fill the index with fields for x in range(1, 50): client.ft().alter_schema_add((TextField(f"f{x}"), ))
def test_aggregations_apply(client): client.ft().create_index(( TextField("PrimaryKey", sortable=True), NumericField("CreatedDateTimeUTC", sortable=True), )) client.ft().client.hset( "doc1", mapping={ "PrimaryKey": "9::362330", "CreatedDateTimeUTC": "637387878524969984" }, ) client.ft().client.hset( "doc2", mapping={ "PrimaryKey": "9::362329", "CreatedDateTimeUTC": "637387875859270016" }, ) req = aggregations.AggregateRequest("*").apply( CreatedDateTimeUTC="@CreatedDateTimeUTC * 10") res = client.ft().aggregate(req) assert res.rows[0] == ["CreatedDateTimeUTC", "6373878785249699840"] assert res.rows[1] == ["CreatedDateTimeUTC", "6373878758592700416"]
def test_example(client): # Creating the index definition and schema client.ft().create_index((TextField("title", weight=5.0), TextField("body"))) # Indexing a document client.ft().add_document( "doc1", title="RediSearch", body="Redisearch impements a search engine on top of redis", ) # Searching with complex parameters: q = Query("search engine").verbatim().no_content().paging(0, 5) res = client.ft().search(q) assert res is not None
def test_payloads_with_no_content(client): client.ft().create_index((TextField("txt"), )) client.ft().add_document("doc1", payload="foo baz", txt="foo bar") client.ft().add_document("doc2", payload="foo baz2", txt="foo bar") q = Query("foo bar").with_payloads().no_content() res = client.ft().search(q) assert 2 == len(res.docs)
async def test_payloads_with_no_content(modclient: redis.Redis): await modclient.ft().create_index((TextField("txt"),)) await modclient.ft().add_document("doc1", payload="foo baz", txt="foo bar") await modclient.ft().add_document("doc2", payload="foo baz2", txt="foo bar") q = Query("foo bar").with_payloads().no_content() res = await modclient.ft().search(q) assert 2 == len(res.docs)
def test_textfield_sortable_nostem(client): # Creating the index definition with sortable and no_stem client.ft().create_index((TextField("txt", sortable=True, no_stem=True), )) # Now get the index info to confirm its contents response = client.ft().info() assert "SORTABLE" in response["attributes"][0] assert "NOSTEM" in response["attributes"][0]
def test_alter_schema_add(client): # Creating the index definition and schema client.ft().create_index(TextField("title")) # Using alter to add a field client.ft().alter_schema_add(TextField("body")) # Indexing a document client.ft().add_document("doc1", title="MyTitle", body="Some content only in the body") # Searching with parameter only in the body (the added field) q = Query("only in the body") # Ensure we find the result searching on the added body field res = client.ft().search(q) assert 1 == res.total
def test_get(client): client.ft().create_index((TextField("f1"), TextField("f2"))) assert [None] == client.ft().get("doc1") assert [None, None] == client.ft().get("doc2", "doc1") client.ft().add_document("doc1", f1="some valid content dd1", f2="this is sample text ff1") client.ft().add_document("doc2", f1="some valid content dd2", f2="this is sample text ff2") assert [["f1", "some valid content dd2", "f2", "this is sample text ff2"]] == client.ft().get("doc2") assert [ ["f1", "some valid content dd1", "f2", "this is sample text ff1"], ["f1", "some valid content dd2", "f2", "this is sample text ff2"], ] == client.ft().get("doc1", "doc2")
def test_phonetic_matcher(client): client.ft().create_index((TextField("name"), )) client.ft().add_document("doc1", name="Jon") client.ft().add_document("doc2", name="John") res = client.ft().search(Query("Jon")) assert 1 == len(res.docs) assert "Jon" == res.docs[0].name # Drop and create index with phonetic matcher client.flushdb() client.ft().create_index((TextField("name", phonetic_matcher="dm:en"), )) client.ft().add_document("doc1", name="Jon") client.ft().add_document("doc2", name="John") res = client.ft().search(Query("Jon")) assert 2 == len(res.docs) assert ["John", "Jon"] == sorted(d.name for d in res.docs)
async def test_get(modclient: redis.Redis): await modclient.ft().create_index((TextField("f1"), TextField("f2"))) assert [None] == await modclient.ft().get("doc1") assert [None, None] == await modclient.ft().get("doc2", "doc1") await modclient.ft().add_document( "doc1", f1="some valid content dd1", f2="this is sample text ff1" ) await modclient.ft().add_document( "doc2", f1="some valid content dd2", f2="this is sample text ff2" ) assert [ ["f1", "some valid content dd2", "f2", "this is sample text ff2"] ] == await modclient.ft().get("doc2") assert [ ["f1", "some valid content dd1", "f2", "this is sample text ff1"], ["f1", "some valid content dd2", "f2", "this is sample text ff2"], ] == await modclient.ft().get("doc1", "doc2")
def test_scores(client): client.ft().create_index((TextField("txt"), )) client.ft().add_document("doc1", txt="foo baz") client.ft().add_document("doc2", txt="foo bar") q = Query("foo ~bar").with_scores() res = client.ft().search(q) assert 2 == res.total assert "doc2" == res.docs[0].id assert 3.0 == res.docs[0].score assert "doc1" == res.docs[1].id
async def test_stopwords(modclient: redis.Redis): stopwords = ["foo", "bar", "baz"] await modclient.ft().create_index((TextField("txt"),), stopwords=stopwords) await modclient.ft().add_document("doc1", txt="foo bar") await modclient.ft().add_document("doc2", txt="hello world") await waitForIndex(modclient, "idx") q1 = Query("foo bar").no_content() q2 = Query("foo bar hello world").no_content() res1, res2 = await modclient.ft().search(q1), await modclient.ft().search(q2) assert 0 == res1.total assert 1 == res2.total
async def test_scores(modclient: redis.Redis): await modclient.ft().create_index((TextField("txt"),)) await modclient.ft().add_document("doc1", txt="foo baz") await modclient.ft().add_document("doc2", txt="foo bar") q = Query("foo ~bar").with_scores() res = await modclient.ft().search(q) assert 2 == res.total assert "doc2" == res.docs[0].id assert 3.0 == res.docs[0].score assert "doc1" == res.docs[1].id
def test_stopwords(client): client.ft().create_index((TextField("txt"), ), stopwords=["foo", "bar", "baz"]) client.ft().add_document("doc1", txt="foo bar") client.ft().add_document("doc2", txt="hello world") waitForIndex(client, "idx") q1 = Query("foo bar").no_content() q2 = Query("foo bar hello world").no_content() res1, res2 = client.ft().search(q1), client.ft().search(q2) assert 0 == res1.total assert 1 == res2.total
def test_aggregations_load(client): client.ft().create_index(( TextField("t1"), TextField("t2"), )) client.ft().client.hset("doc1", mapping={"t1": "hello", "t2": "world"}) # load t1 req = aggregations.AggregateRequest("*").load("t1") res = client.ft().aggregate(req) assert res.rows[0] == ["t1", "hello"] # load t2 req = aggregations.AggregateRequest("*").load("t2") res = client.ft().aggregate(req) assert res.rows[0] == ["t2", "world"] # load all req = aggregations.AggregateRequest("*").load() res = client.ft().aggregate(req) assert res.rows[0] == ["t1", "hello", "t2", "world"]
def test_syndump(client): definition = IndexDefinition(index_type=IndexType.HASH) client.ft().create_index( ( TextField("title"), TextField("body"), ), definition=definition, ) client.ft().synupdate("id1", False, "boy", "child", "offspring") client.ft().synupdate("id2", False, "baby", "child") client.ft().synupdate("id3", False, "tree", "wood") res = client.ft().syndump() assert res == { "boy": ["id1"], "tree": ["id3"], "wood": ["id3"], "child": ["id1", "id2"], "baby": ["id2"], "offspring": ["id1"], }
def test_payloads(client): client.ft().create_index((TextField("txt"), )) client.ft().add_document("doc1", payload="foo baz", txt="foo bar") client.ft().add_document("doc2", txt="foo bar") q = Query("foo bar").with_payloads() res = client.ft().search(q) assert 2 == res.total assert "doc1" == res.docs[0].id assert "doc2" == res.docs[1].id assert "foo baz" == res.docs[0].payload assert res.docs[1].payload is None