def test_search_return_fields(client): res = client.json().set( "doc:1", Path.rootPath(), { "t": "riceratops", "t2": "telmatosaurus", "n": 9072, "flt": 97.2 }, ) assert res # create index on definition = IndexDefinition(index_type=IndexType.JSON) SCHEMA = ( TextField("$.t"), NumericField("$.flt"), ) client.ft().create_index(SCHEMA, definition=definition) waitForIndex(client, "idx") total = client.ft().search(Query("*").return_field("$.t", as_field="txt")).docs assert 1 == len(total) assert "doc:1" == total[0].id assert "riceratops" == total[0].txt total = client.ft().search( Query("*").return_field("$.t2", as_field="txt")).docs assert 1 == len(total) assert "doc:1" == total[0].id assert "telmatosaurus" == total[0].txt
def test_tags(client): client.ft().create_index((TextField("txt"), TagField("tags"))) tags = "foo,foo bar,hello;world" tags2 = "soba,ramen" client.ft().add_document("doc1", txt="fooz barz", tags=tags) client.ft().add_document("doc2", txt="noodles", tags=tags2) waitForIndex(client, "idx") q = Query("@tags:{foo}") res = client.ft().search(q) assert 1 == res.total q = Query("@tags:{foo bar}") res = client.ft().search(q) assert 1 == res.total q = Query("@tags:{foo\\ bar}") res = client.ft().search(q) assert 1 == res.total q = Query("@tags:{hello\\;world}") res = client.ft().search(q) assert 1 == res.total q2 = client.ft().tagvals("tags") assert (tags.split(",") + tags2.split(",")).sort() == q2.sort()
def test_json_with_jsonpath(client): definition = IndexDefinition(index_type=IndexType.JSON) client.ft().create_index( ( TextField('$["prod:name"]', as_name="name"), TextField("$.prod:name", as_name="name_unsupported"), ), definition=definition, ) client.json().set("doc:1", Path.rootPath(), {"prod:name": "RediSearch"}) # query for a supported field succeeds res = client.ft().search(Query("@name:RediSearch")) assert res.total == 1 assert res.docs[0].id == "doc:1" assert res.docs[0].json == '{"prod:name":"RediSearch"}' # query for an unsupported field fails res = client.ft().search("@name_unsupported:RediSearch") assert res.total == 0 # return of a supported field succeeds res = client.ft().search(Query("@name:RediSearch").return_field("name")) assert res.total == 1 assert res.docs[0].id == "doc:1" assert res.docs[0].name == "RediSearch" # return of an unsupported field fails res = client.ft().search( Query("@name:RediSearch").return_field("name_unsupported")) assert res.total == 1 assert res.docs[0].id == "doc:1" with pytest.raises(Exception): res.docs[0].name_unsupported
def search(search_term: str): results = [] # TODO: figure out how to escape dashes # "D-Feet" seems to be interpreted as "d and not feet" search_term = search_term.replace("-", " ") # This seems to confuse redis too search_term = search_term.replace(".*", "*") # Remove reserved characters reserved_chars = [ "@", "!", "{", "}", "(", ")", "|", "-", "=", ">", "[", "]", ":", ";", "*", ] for char in reserved_chars: search_term = search_term.replace(char, "") if len(search_term.strip()) == 0: return None # TODO: should input be sanitized here? generic_query = Query(build_query(f"{search_term}*")).no_content() # TODO: Backend API doesn't support paging so bring twohundredfifty results # instead of just 10, which is the redis default generic_query.paging(0, 250) search_results = redis_conn.ft().search(generic_query) for doc in search_results.docs: results.append(doc.id) # redis does not support fuzzy search for non-alphabet strings if not len(results): if search_term.isalpha(): query = Query(build_query(f"%{search_term}%")).no_content() search_results = redis_conn.ft().search(query) for doc in search_results.docs: results.append(doc.id) else: return None return results
async def test_stopwords(modclient: redis.Redis): stopwords = ["foo", "bar", "baz"] await modclient.ft().create_index((TextField("txt"),), stopwords=stopwords) await modclient.ft().add_document("doc1", txt="foo bar") await modclient.ft().add_document("doc2", txt="hello world") await waitForIndex(modclient, "idx") q1 = Query("foo bar").no_content() q2 = Query("foo bar hello world").no_content() res1, res2 = await modclient.ft().search(q1), await modclient.ft().search(q2) assert 0 == res1.total assert 1 == res2.total
def test_stopwords(client): client.ft().create_index((TextField("txt"), ), stopwords=["foo", "bar", "baz"]) client.ft().add_document("doc1", txt="foo bar") client.ft().add_document("doc2", txt="hello world") waitForIndex(client, "idx") q1 = Query("foo bar").no_content() q2 = Query("foo bar hello world").no_content() res1, res2 = client.ft().search(q1), client.ft().search(q2) assert 0 == res1.total assert 1 == res2.total
def index(): query = '+@abv:[2 7] +@ibu:[1 +inf]' q = Query(query) result = g.rsbeer.search(q) res = docs_to_dict(result.docs) return render_template( 'index.html', title='Home', count=result.total, duration=result.duration, rsindex=g.rsbeer.info()['index_name'], rsquery=q.query_string(), result=res )
def test_payloads_with_no_content(client): client.ft().create_index((TextField("txt"), )) client.ft().add_document("doc1", payload="foo baz", txt="foo bar") client.ft().add_document("doc2", payload="foo baz2", txt="foo bar") q = Query("foo bar").with_payloads().no_content() res = client.ft().search(q) assert 2 == len(res.docs)
async def test_payloads_with_no_content(modclient: redis.Redis): await modclient.ft().create_index((TextField("txt"),)) await modclient.ft().add_document("doc1", payload="foo baz", txt="foo bar") await modclient.ft().add_document("doc2", payload="foo baz2", txt="foo bar") q = Query("foo bar").with_payloads().no_content() res = await modclient.ft().search(q) assert 2 == len(res.docs)
def test_scorer(client): client.ft().create_index((TextField("description"), )) client.ft().add_document( "doc1", description="The quick brown fox jumps over the lazy dog") client.ft().add_document( "doc2", description= "Quick alice was beginning to get very tired of sitting by her quick sister on the bank, and of having nothing to do.", # noqa ) # default scorer is TFIDF res = client.ft().search(Query("quick").with_scores()) assert 1.0 == res.docs[0].score res = client.ft().search(Query("quick").scorer("TFIDF").with_scores()) assert 1.0 == res.docs[0].score res = client.ft().search( Query("quick").scorer("TFIDF.DOCNORM").with_scores()) assert 0.1111111111111111 == res.docs[0].score res = client.ft().search(Query("quick").scorer("BM25").with_scores()) assert 0.17699114465425977 == res.docs[0].score res = client.ft().search(Query("quick").scorer("DISMAX").with_scores()) assert 2.0 == res.docs[0].score res = client.ft().search(Query("quick").scorer("DOCSCORE").with_scores()) assert 1.0 == res.docs[0].score res = client.ft().search(Query("quick").scorer("HAMMING").with_scores()) assert 0.0 == res.docs[0].score
async def process(self, name): # connect to the index chromosome_index = AsyncSearch(self.redis_connection, index_name='chromosomeIdx') # search the chromosome index query = Query(name)\ .limit_fields('name')\ .return_fields('name') result = await chromosome_index.search(query) chromosomes = list(map(lambda d: d.name, result.docs)) return chromosomes
def test_phonetic_matcher(client): client.ft().create_index((TextField("name"), )) client.ft().add_document("doc1", name="Jon") client.ft().add_document("doc2", name="John") res = client.ft().search(Query("Jon")) assert 1 == len(res.docs) assert "Jon" == res.docs[0].name # Drop and create index with phonetic matcher client.flushdb() client.ft().create_index((TextField("name", phonetic_matcher="dm:en"), )) client.ft().add_document("doc1", name="Jon") client.ft().add_document("doc2", name="John") res = client.ft().search(Query("Jon")) assert 2 == len(res.docs) assert ["John", "Jon"] == sorted(d.name for d in res.docs)
def test_sort_by(client): client.ft().create_index( (TextField("txt"), NumericField("num", sortable=True))) client.ft().add_document("doc1", txt="foo bar", num=1) client.ft().add_document("doc2", txt="foo baz", num=2) client.ft().add_document("doc3", txt="foo qux", num=3) # Test sort q1 = Query("foo").sort_by("num", asc=True).no_content() q2 = Query("foo").sort_by("num", asc=False).no_content() res1, res2 = client.ft().search(q1), client.ft().search(q2) assert 3 == res1.total assert "doc1" == res1.docs[0].id assert "doc2" == res1.docs[1].id assert "doc3" == res1.docs[2].id assert 3 == res2.total assert "doc1" == res2.docs[2].id assert "doc2" == res2.docs[1].id assert "doc3" == res2.docs[0].id
async def test_filters(modclient: redis.Redis): await ( modclient.ft().create_index( (TextField("txt"), NumericField("num"), GeoField("loc")) ) ) await ( modclient.ft().add_document( "doc1", txt="foo bar", num=3.141, loc="-0.441,51.458" ) ) await modclient.ft().add_document("doc2", txt="foo baz", num=2, loc="-0.1,51.2") await waitForIndex(modclient, "idx") # Test numerical filter q1 = Query("foo").add_filter(NumericFilter("num", 0, 2)).no_content() q2 = ( Query("foo") .add_filter(NumericFilter("num", 2, NumericFilter.INF, minExclusive=True)) .no_content() ) res1, res2 = await modclient.ft().search(q1), await modclient.ft().search(q2) assert 1 == res1.total assert 1 == res2.total assert "doc2" == res1.docs[0].id assert "doc1" == res2.docs[0].id # Test geo filter q1 = Query("foo").add_filter(GeoFilter("loc", -0.44, 51.45, 10)).no_content() q2 = Query("foo").add_filter(GeoFilter("loc", -0.44, 51.45, 100)).no_content() res1, res2 = await modclient.ft().search(q1), await modclient.ft().search(q2) assert 1 == res1.total assert 2 == res2.total assert "doc1" == res1.docs[0].id # Sort results, after RDB reload order may change res = [res2.docs[0].id, res2.docs[1].id] res.sort() assert ["doc1", "doc2"] == res
def test_scores(client): client.ft().create_index((TextField("txt"), )) client.ft().add_document("doc1", txt="foo baz") client.ft().add_document("doc2", txt="foo bar") q = Query("foo ~bar").with_scores() res = client.ft().search(q) assert 2 == res.total assert "doc2" == res.docs[0].id assert 3.0 == res.docs[0].score assert "doc1" == res.docs[1].id
async def test_scores(modclient: redis.Redis): await modclient.ft().create_index((TextField("txt"),)) await modclient.ft().add_document("doc1", txt="foo baz") await modclient.ft().add_document("doc2", txt="foo bar") q = Query("foo ~bar").with_scores() res = await modclient.ft().search(q) assert 2 == res.total assert "doc2" == res.docs[0].id assert 3.0 == res.docs[0].score assert "doc1" == res.docs[1].id
def test_no_index(client): client.ft().create_index(( TextField("field"), TextField("text", no_index=True, sortable=True), NumericField("numeric", no_index=True, sortable=True), GeoField("geo", no_index=True, sortable=True), TagField("tag", no_index=True, sortable=True), )) client.ft().add_document("doc1", field="aaa", text="1", numeric="1", geo="1,1", tag="1") client.ft().add_document("doc2", field="aab", text="2", numeric="2", geo="2,2", tag="2") waitForIndex(client, "idx") res = client.ft().search(Query("@text:aa*")) assert 0 == res.total res = client.ft().search(Query("@field:aa*")) assert 2 == res.total res = client.ft().search(Query("*").sort_by("text", asc=False)) assert 2 == res.total assert "doc2" == res.docs[0].id res = client.ft().search(Query("*").sort_by("text", asc=True)) assert "doc1" == res.docs[0].id res = client.ft().search(Query("*").sort_by("numeric", asc=True)) assert "doc1" == res.docs[0].id res = client.ft().search(Query("*").sort_by("geo", asc=True)) assert "doc1" == res.docs[0].id res = client.ft().search(Query("*").sort_by("tag", asc=True)) assert "doc1" == res.docs[0].id # Ensure exception is raised for non-indexable, non-sortable fields with pytest.raises(Exception): TextField("name", no_index=True, sortable=False) with pytest.raises(Exception): NumericField("name", no_index=True, sortable=False) with pytest.raises(Exception): GeoField("name", no_index=True, sortable=False) with pytest.raises(Exception): TagField("name", no_index=True, sortable=False)
def test_payloads(client): client.ft().create_index((TextField("txt"), )) client.ft().add_document("doc1", payload="foo baz", txt="foo bar") client.ft().add_document("doc2", txt="foo bar") q = Query("foo bar").with_payloads() res = client.ft().search(q) assert 2 == res.total assert "doc1" == res.docs[0].id assert "doc2" == res.docs[1].id assert "foo baz" == res.docs[0].payload assert res.docs[1].payload is None
async def _queryToChromosomeGeneMatchIndexes(self, query_track, gene_index): families = set(query_track) families.discard('') chromosome_match_indices = defaultdict(list) for family in families: # count how many genes are in the family query = Query(family)\ .limit_fields('family')\ .verbatim()\ .paging(0, 0) result = await gene_index.search(query) num_genes = result.total # get the genes query = Query(family)\ .limit_fields('family')\ .verbatim()\ .return_fields('chromosome', 'index')\ .paging(0, num_genes) result = await gene_index.search(query) for d in result.docs: chromosome_match_indices[d.chromosome].append(int(d.index)) return chromosome_match_indices
def test_profile_limited(client): client.ft().create_index((TextField("t"), )) client.ft().client.hset("1", "t", "hello") client.ft().client.hset("2", "t", "hell") client.ft().client.hset("3", "t", "help") client.ft().client.hset("4", "t", "helowa") q = Query("%hell% hel*") res, det = client.ft().profile(q, limited=True) assert (det["Iterators profile"]["Child iterators"][0]["Child iterators"] == "The number of iterators in the union is 3") assert (det["Iterators profile"]["Child iterators"][1]["Child iterators"] == "The number of iterators in the union is 4") assert det["Iterators profile"]["Type"] == "INTERSECT" assert len(res.docs) == 3 # check also the search result
def test_example(client): # Creating the index definition and schema client.ft().create_index((TextField("title", weight=5.0), TextField("body"))) # Indexing a document client.ft().add_document( "doc1", title="RediSearch", body="Redisearch impements a search engine on top of redis", ) # Searching with complex parameters: q = Query("search engine").verbatim().no_content().paging(0, 5) res = client.ft().search(q) assert res is not None
def test_alter_schema_add(client): # Creating the index definition and schema client.ft().create_index(TextField("title")) # Using alter to add a field client.ft().alter_schema_add(TextField("body")) # Indexing a document client.ft().add_document("doc1", title="MyTitle", body="Some content only in the body") # Searching with parameter only in the body (the added field) q = Query("only in the body") # Ensure we find the result searching on the added body field res = client.ft().search(q) assert 1 == res.total
def test_summarize(client): createIndex(client.ft()) waitForIndex(client, "idx") q = Query("king henry").paging(0, 1) q.highlight(fields=("play", "txt"), tags=("<b>", "</b>")) q.summarize("txt") doc = sorted(client.ft().search(q).docs)[0] assert "<b>Henry</b> IV" == doc.play assert ( "ACT I SCENE I. London. The palace. Enter <b>KING</b> <b>HENRY</b>, LORD JOHN OF LANCASTER, the EARL of WESTMORELAND, SIR... " # noqa == doc.txt) q = Query("king henry").paging(0, 1).summarize().highlight() doc = sorted(client.ft().search(q).docs)[0] assert "<b>Henry</b> ... " == doc.play assert ( "ACT I SCENE I. London. The palace. Enter <b>KING</b> <b>HENRY</b>, LORD JOHN OF LANCASTER, the EARL of WESTMORELAND, SIR... " # noqa == doc.txt)
def test_fields_as_name(client): # create index SCHEMA = ( TextField("$.name", sortable=True, as_name="name"), NumericField("$.age", as_name="just_a_number"), ) definition = IndexDefinition(index_type=IndexType.JSON) client.ft().create_index(SCHEMA, definition=definition) # insert json data res = client.json().set("doc:1", Path.rootPath(), { "name": "Jon", "age": 25 }) assert res total = client.ft().search( Query("Jon").return_fields("name", "just_a_number")).docs assert 1 == len(total) assert "doc:1" == total[0].id assert "Jon" == total[0].name assert "25" == total[0].just_a_number
def test_profile(client): client.ft().create_index((TextField("t"), )) client.ft().client.hset("1", "t", "hello") client.ft().client.hset("2", "t", "world") # check using Query q = Query("hello|world").no_content() res, det = client.ft().profile(q) assert det["Iterators profile"]["Counter"] == 2.0 assert len(det["Iterators profile"]["Child iterators"]) == 2 assert det["Iterators profile"]["Type"] == "UNION" assert det["Parsing time"] < 0.5 assert len(res.docs) == 2 # check also the search result # check using AggregateRequest req = (aggregations.AggregateRequest("*").load("t").apply( prefix="startswith(@t, 'hel')")) res, det = client.ft().profile(req) assert det["Iterators profile"]["Counter"] == 2.0 assert det["Iterators profile"]["Type"] == "WILDCARD" assert det["Parsing time"] < 0.5 assert len(res.rows) == 2 # check also the search result
def test_synupdate(client): definition = IndexDefinition(index_type=IndexType.HASH) client.ft().create_index( ( TextField("title"), TextField("body"), ), definition=definition, ) client.ft().synupdate("id1", True, "boy", "child", "offspring") client.ft().add_document("doc1", title="he is a baby", body="this is a test") client.ft().synupdate("id1", True, "baby") client.ft().add_document("doc2", title="he is another baby", body="another test") res = client.ft().search(Query("child").expander("SYNONYM")) assert res.docs[0].id == "doc2" assert res.docs[0].title == "he is another baby" assert res.docs[0].body == "another test"
def testSkipInitialScan(client): client.hset("doc1", "foo", "bar") q = Query("@foo:bar") client.ft().create_index((TextField("foo"), ), skip_initial_scan=True) assert 0 == client.ft().search(q).total
async def _getTargets(self, targets, chromosome, matched, intermediate): # use a pipeline to reduce the number of calls to database pipeline = self.redis_connection.pipeline() gene_index = CustomAsyncSearch(pipeline, index_name='geneIdx') # get genes for each family and bin them by chromosome families = set(chromosome) families.discard('') chromosome_match_indices = defaultdict(list) # count how many genes are in each family query_strings = [] count_queries = [] for family in families: query_string = f'(@family:{family})' # limit the genes to the target chromosomes if targets: query_string += \ '(' + \ ' | '.join(map(lambda name: f'@chromosome:{name}', targets)) + \ ')' query_strings.append(query_string) # count how many genes are in the family query = Query(query_string)\ .verbatim()\ .paging(0, 0) count_queries.append(query) await gene_index.search(query ) # returns the pipeline, not a Result! count_results = await pipeline.execute() # get the genes for each family gene_queries = [] for family, query_string, query, res in zip(families, query_strings, count_queries, count_results): result = gene_index.search_result(query, res) num_genes = result.total # get the genes query = Query(query_string)\ .verbatim()\ .return_fields('chromosome', 'index')\ .paging(0, num_genes) gene_queries.append(query) await gene_index.search(query ) # returns the pipeline, not a Result! gene_results = await pipeline.execute() # bin the genes by chromosome for query, res in zip(gene_queries, gene_results): result = gene_index.search_result(query, res) for d in result.docs: chromosome_match_indices[d.chromosome].append(int(d.index)) # sort index lists and filter by match and intermediate parameters filtered_targets = [] for name in chromosome_match_indices: num_genes = len(chromosome_match_indices[name]) # there's not enough matches on the entire chromosome if num_genes < matched: continue # check blocks of genes that are closes indices = sorted(chromosome_match_indices[name]) block = [indices[0]] for j, i in enumerate(indices[1:]): # match is close enough to previous match to add to block if (intermediate < 1 and (i-block[-1])/len(chromosome) <= intermediate) \ or (intermediate >= 1 and i-block[-1] <= intermediate-1): block.append(i) # match is too far away from previous match else: # save block if it's big enough if (matched < 1 and len(block)/len(chromosome) >= matched) or \ (matched >= 1 and len(block) >= matched): filtered_targets.append(name) break # start a new block with the current match block = [i] # no need to compute more blocks if none will be large enough if num_genes - j < matched: break # save last block if it's big enough if (matched < 1 and len(block)/len(chromosome) >= matched) or \ (matched >= 1 and len(block) >= matched) and \ (not filtered_targets or filtered_targets[-1] != name): filtered_targets.append(name) return filtered_targets
def test_client(client): num_docs = 500 createIndex(client.ft(), num_docs=num_docs) waitForIndex(client, "idx") # verify info info = client.ft().info() for k in [ "index_name", "index_options", "attributes", "num_docs", "max_doc_id", "num_terms", "num_records", "inverted_sz_mb", "offset_vectors_sz_mb", "doc_table_size_mb", "key_table_size_mb", "records_per_doc_avg", "bytes_per_record_avg", "offsets_per_term_avg", "offset_bits_per_record_avg", ]: assert k in info assert client.ft().index_name == info["index_name"] assert num_docs == int(info["num_docs"]) res = client.ft().search("henry iv") assert isinstance(res, Result) assert 225 == res.total assert 10 == len(res.docs) assert res.duration > 0 for doc in res.docs: assert doc.id assert doc.play == "Henry IV" assert len(doc.txt) > 0 # test no content res = client.ft().search(Query("king").no_content()) assert 194 == res.total assert 10 == len(res.docs) for doc in res.docs: assert "txt" not in doc.__dict__ assert "play" not in doc.__dict__ # test verbatim vs no verbatim total = client.ft().search(Query("kings").no_content()).total vtotal = client.ft().search(Query("kings").no_content().verbatim()).total assert total > vtotal # test in fields txt_total = (client.ft().search( Query("henry").no_content().limit_fields("txt")).total) play_total = (client.ft().search( Query("henry").no_content().limit_fields("play")).total) both_total = (client.ft().search( Query("henry").no_content().limit_fields("play", "txt")).total) assert 129 == txt_total assert 494 == play_total assert 494 == both_total # test load_document doc = client.ft().load_document("henry vi part 3:62") assert doc is not None assert "henry vi part 3:62" == doc.id assert doc.play == "Henry VI Part 3" assert len(doc.txt) > 0 # test in-keys ids = [x.id for x in client.ft().search(Query("henry")).docs] assert 10 == len(ids) subset = ids[:5] docs = client.ft().search(Query("henry").limit_ids(*subset)) assert len(subset) == docs.total ids = [x.id for x in docs.docs] assert set(ids) == set(subset) # test slop and in order assert 193 == client.ft().search(Query("henry king")).total assert 3 == client.ft().search( Query("henry king").slop(0).in_order()).total assert 52 == client.ft().search( Query("king henry").slop(0).in_order()).total assert 53 == client.ft().search(Query("henry king").slop(0)).total assert 167 == client.ft().search(Query("henry king").slop(100)).total # test delete document client.ft().add_document("doc-5ghs2", play="Death of a Salesman") res = client.ft().search(Query("death of a salesman")) assert 1 == res.total assert 1 == client.ft().delete_document("doc-5ghs2") res = client.ft().search(Query("death of a salesman")) assert 0 == res.total assert 0 == client.ft().delete_document("doc-5ghs2") client.ft().add_document("doc-5ghs2", play="Death of a Salesman") res = client.ft().search(Query("death of a salesman")) assert 1 == res.total client.ft().delete_document("doc-5ghs2")
def testSummarizeDisabled_nohl(client): client.ft().create_index((TextField("txt"), ), no_highlight=True) client.ft().add_document("doc1", txt="foo bar") with pytest.raises(Exception): client.ft().search(Query("foo").summarize(fields=["txt"]))