Example #1
0
    def __setupIndexes(self, chunk_size):
        '''
    Sets up RediSearch indexes for chromosomes and genes.
  
    Parameters:
      chunk_size (int): The chunk size to be used for Redis batch processing.
  
    Returns:
      redis.commands.search.Search.BatchIndexer: A batch processor for the chromosome
        index.
      redis.commands.search.Search.BatchIndexer: A batch processor for the gene index.
    '''

        # create the chromosome index
        chromosome_fields = [
            TextField('name'),
            NumericField('length'),
            TextField('genus'),
            TextField('species'),
        ]
        chromosome_definition = IndexDefinition(prefix=['chromosome:'])
        chromosome_indexer = \
          self.__makeOrGetIndex(
            CHROMOSOME_INDEX_NAME,
            chromosome_fields,
            chromosome_definition,
            chunk_size,
          )
        # check if any non-RediSearch chromosome keys exist, and drop if necessary
        self.__checkChromosomeKeys()

        # create the gene index
        gene_fields = [
            TextField('chromosome'),
            TextField('name'),
            NumericField('fmin'),
            NumericField('fmax'),
            TextField('family'),
            NumericField('strand'),
            NumericField('index', sortable=True),
        ]
        gene_definition = IndexDefinition(prefix=['gene:'])
        gene_indexer = \
          self.__makeOrGetIndex(
            GENE_INDEX_NAME,
            gene_fields,
            gene_definition,
            chunk_size,
          )

        # set the schema version and compatible versions
        self.redis_connection.set(VERSION_KEY, redis_loader.__schema_version__)
        self.redis_connection.delete(COMPATIBLE_KEY)
        self.redis_connection.sadd(
            COMPATIBLE_KEY, *redis_loader.__compatible_schema_versions__)

        return chromosome_indexer, gene_indexer
Example #2
0
def test_no_index(client):
    client.ft().create_index((
        TextField("field"),
        TextField("text", no_index=True, sortable=True),
        NumericField("numeric", no_index=True, sortable=True),
        GeoField("geo", no_index=True, sortable=True),
        TagField("tag", no_index=True, sortable=True),
    ))

    client.ft().add_document("doc1",
                             field="aaa",
                             text="1",
                             numeric="1",
                             geo="1,1",
                             tag="1")
    client.ft().add_document("doc2",
                             field="aab",
                             text="2",
                             numeric="2",
                             geo="2,2",
                             tag="2")
    waitForIndex(client, "idx")

    res = client.ft().search(Query("@text:aa*"))
    assert 0 == res.total

    res = client.ft().search(Query("@field:aa*"))
    assert 2 == res.total

    res = client.ft().search(Query("*").sort_by("text", asc=False))
    assert 2 == res.total
    assert "doc2" == res.docs[0].id

    res = client.ft().search(Query("*").sort_by("text", asc=True))
    assert "doc1" == res.docs[0].id

    res = client.ft().search(Query("*").sort_by("numeric", asc=True))
    assert "doc1" == res.docs[0].id

    res = client.ft().search(Query("*").sort_by("geo", asc=True))
    assert "doc1" == res.docs[0].id

    res = client.ft().search(Query("*").sort_by("tag", asc=True))
    assert "doc1" == res.docs[0].id

    # Ensure exception is raised for non-indexable, non-sortable fields
    with pytest.raises(Exception):
        TextField("name", no_index=True, sortable=False)
    with pytest.raises(Exception):
        NumericField("name", no_index=True, sortable=False)
    with pytest.raises(Exception):
        GeoField("name", no_index=True, sortable=False)
    with pytest.raises(Exception):
        TagField("name", no_index=True, sortable=False)
Example #3
0
def createIndex(client, num_docs=100, definition=None):
    try:
        client.create_index(
            (TextField("play",
                       weight=5.0), TextField("txt"), NumericField("chapter")),
            definition=definition,
        )
    except redis.ResponseError:
        client.dropindex(delete_documents=True)
        return createIndex(client, num_docs=num_docs, definition=definition)

    chapters = {}
    bzfp = TextIOWrapper(bz2.BZ2File(WILL_PLAY_TEXT), encoding="utf8")

    r = csv.reader(bzfp, delimiter=";")
    for n, line in enumerate(r):

        play, chapter, _, text = line[1], line[2], line[4], line[5]

        key = f"{play}:{chapter}".lower()
        d = chapters.setdefault(key, {})
        d["play"] = play
        d["txt"] = d.get("txt", "") + " " + text
        d["chapter"] = int(chapter or 0)
        if len(chapters) == num_docs:
            break

    indexer = client.batch_indexer(chunk_size=50)
    assert isinstance(indexer, Search.BatchIndexer)
    assert 50 == indexer.chunk_size

    for key, doc in chapters.items():
        indexer.add_document(key, **doc)
    indexer.commit()
Example #4
0
def test_create_json_with_alias(client):
    """
    Create definition with IndexType.JSON as index type (ON JSON) with two
    fields with aliases, and use json client to test it.
    """
    definition = IndexDefinition(prefix=["king:"], index_type=IndexType.JSON)
    client.ft().create_index(
        (TextField("$.name",
                   as_name="name"), NumericField("$.num", as_name="num")),
        definition=definition,
    )

    client.json().set("king:1", Path.rootPath(), {"name": "henry", "num": 42})
    client.json().set("king:2", Path.rootPath(), {
        "name": "james",
        "num": 3.14
    })

    res = client.ft().search("@name:henry")
    assert res.docs[0].id == "king:1"
    assert res.docs[0].json == '{"name":"henry","num":42}'
    assert res.total == 1

    res = client.ft().search("@num:[0 10]")
    assert res.docs[0].id == "king:2"
    assert res.docs[0].json == '{"name":"james","num":3.14}'
    assert res.total == 1

    # Tests returns an error if path contain special characters (user should
    # use an alias)
    with pytest.raises(Exception):
        client.ft().search("@$.name:henry")
Example #5
0
def test_search_return_fields(client):
    res = client.json().set(
        "doc:1",
        Path.rootPath(),
        {
            "t": "riceratops",
            "t2": "telmatosaurus",
            "n": 9072,
            "flt": 97.2
        },
    )
    assert res

    # create index on
    definition = IndexDefinition(index_type=IndexType.JSON)
    SCHEMA = (
        TextField("$.t"),
        NumericField("$.flt"),
    )
    client.ft().create_index(SCHEMA, definition=definition)
    waitForIndex(client, "idx")

    total = client.ft().search(Query("*").return_field("$.t",
                                                       as_field="txt")).docs
    assert 1 == len(total)
    assert "doc:1" == total[0].id
    assert "riceratops" == total[0].txt

    total = client.ft().search(
        Query("*").return_field("$.t2", as_field="txt")).docs
    assert 1 == len(total)
    assert "doc:1" == total[0].id
    assert "telmatosaurus" == total[0].txt
Example #6
0
def test_aggregations_apply(client):
    client.ft().create_index((
        TextField("PrimaryKey", sortable=True),
        NumericField("CreatedDateTimeUTC", sortable=True),
    ))

    client.ft().client.hset(
        "doc1",
        mapping={
            "PrimaryKey": "9::362330",
            "CreatedDateTimeUTC": "637387878524969984"
        },
    )
    client.ft().client.hset(
        "doc2",
        mapping={
            "PrimaryKey": "9::362329",
            "CreatedDateTimeUTC": "637387875859270016"
        },
    )

    req = aggregations.AggregateRequest("*").apply(
        CreatedDateTimeUTC="@CreatedDateTimeUTC * 10")
    res = client.ft().aggregate(req)
    assert res.rows[0] == ["CreatedDateTimeUTC", "6373878785249699840"]
    assert res.rows[1] == ["CreatedDateTimeUTC", "6373878758592700416"]
Example #7
0
def import_brewery_geo(r, rsclient):

    # create the brewery redisearch index
    ftidxfields = [
        TextField('name', weight=5.0),
        TextField('address'),
        TextField('city'),
        TextField('state'),
        TextField('country'),
        NumericField('id', sortable=True),
        GeoField('location')
    ]
    rsclient.create_index([*ftidxfields])

    with open(brewerygeofile) as geofile:
        geo = csv.reader(geofile)
        for row in geo:
            if geo.line_num == 1:
                # skip the header line
                continue

            # use the brewery id to generate the brewery key added earlier
            brewery_key = "{}:{}".format(brewery, row[1])

            # get all the data from the brewery hash
            binfo = r.hgetall(brewery_key)

            if not (any(binfo)):
                print(
                    "\tERROR: Missing info for {}, skipping geo import".format(
                        brewery_key))
                continue

            # add the brewery document to the index
            ftaddfields = {
                'name': binfo[b'name'].decode(),
                'address': binfo[b'address1'].decode(),
                'city': binfo[b'city'].decode(),
                'state': binfo[b'state'].decode(),
                'country': binfo[b'country'].decode(),
                'id': row[1],
                'location': "{},{}".format(row[3], row[2])
            }
            try:
                rsclient.add_document("brewery:{}".format(row[1]),
                                      score=1.0,
                                      replace=True,
                                      partial=True,
                                      **ftaddfields)
            except Exception as e:
                print("\tERROR: Failed to add document for {}: {}".format(
                    brewery_key, e))
                continue
Example #8
0
def test_sort_by(client):
    client.ft().create_index(
        (TextField("txt"), NumericField("num", sortable=True)))
    client.ft().add_document("doc1", txt="foo bar", num=1)
    client.ft().add_document("doc2", txt="foo baz", num=2)
    client.ft().add_document("doc3", txt="foo qux", num=3)

    # Test sort
    q1 = Query("foo").sort_by("num", asc=True).no_content()
    q2 = Query("foo").sort_by("num", asc=False).no_content()
    res1, res2 = client.ft().search(q1), client.ft().search(q2)

    assert 3 == res1.total
    assert "doc1" == res1.docs[0].id
    assert "doc2" == res1.docs[1].id
    assert "doc3" == res1.docs[2].id
    assert 3 == res2.total
    assert "doc1" == res2.docs[2].id
    assert "doc2" == res2.docs[1].id
    assert "doc3" == res2.docs[0].id
Example #9
0
async def test_filters(modclient: redis.Redis):
    await (
        modclient.ft().create_index(
            (TextField("txt"), NumericField("num"), GeoField("loc"))
        )
    )
    await (
        modclient.ft().add_document(
            "doc1", txt="foo bar", num=3.141, loc="-0.441,51.458"
        )
    )
    await modclient.ft().add_document("doc2", txt="foo baz", num=2, loc="-0.1,51.2")

    await waitForIndex(modclient, "idx")
    # Test numerical filter
    q1 = Query("foo").add_filter(NumericFilter("num", 0, 2)).no_content()
    q2 = (
        Query("foo")
        .add_filter(NumericFilter("num", 2, NumericFilter.INF, minExclusive=True))
        .no_content()
    )
    res1, res2 = await modclient.ft().search(q1), await modclient.ft().search(q2)

    assert 1 == res1.total
    assert 1 == res2.total
    assert "doc2" == res1.docs[0].id
    assert "doc1" == res2.docs[0].id

    # Test geo filter
    q1 = Query("foo").add_filter(GeoFilter("loc", -0.44, 51.45, 10)).no_content()
    q2 = Query("foo").add_filter(GeoFilter("loc", -0.44, 51.45, 100)).no_content()
    res1, res2 = await modclient.ft().search(q1), await modclient.ft().search(q2)

    assert 1 == res1.total
    assert 2 == res2.total
    assert "doc1" == res1.docs[0].id

    # Sort results, after RDB reload order may change
    res = [res2.docs[0].id, res2.docs[1].id]
    res.sort()
    assert ["doc1", "doc2"] == res
Example #10
0
def test_aggregations_filter(client):
    client.ft().create_index((
        TextField("name", sortable=True),
        NumericField("age", sortable=True),
    ))

    client.ft().client.hset("doc1", mapping={"name": "bar", "age": "25"})
    client.ft().client.hset("doc2", mapping={"name": "foo", "age": "19"})

    req = aggregations.AggregateRequest("*").filter(
        "@name=='foo' && @age < 20")
    res = client.ft().aggregate(req)
    assert len(res.rows) == 1
    assert res.rows[0] == ["name", "foo", "age", "19"]

    req = aggregations.AggregateRequest("*").filter("@age > 15").sort_by(
        "@age")
    res = client.ft().aggregate(req)
    assert len(res.rows) == 2
    assert res.rows[0] == ["age", "19"]
    assert res.rows[1] == ["age", "25"]
Example #11
0
def test_fields_as_name(client):
    # create index
    SCHEMA = (
        TextField("$.name", sortable=True, as_name="name"),
        NumericField("$.age", as_name="just_a_number"),
    )
    definition = IndexDefinition(index_type=IndexType.JSON)
    client.ft().create_index(SCHEMA, definition=definition)

    # insert json data
    res = client.json().set("doc:1", Path.rootPath(), {
        "name": "Jon",
        "age": 25
    })
    assert res

    total = client.ft().search(
        Query("Jon").return_fields("name", "just_a_number")).docs
    assert 1 == len(total)
    assert "doc:1" == total[0].id
    assert "Jon" == total[0].name
    assert "25" == total[0].just_a_number
Example #12
0
def test_aggregations_groupby(client):
    # Creating the index definition and schema
    client.ft().create_index((
        NumericField("random_num"),
        TextField("title"),
        TextField("body"),
        TextField("parent"),
    ))

    # Indexing a document
    client.ft().add_document(
        "search",
        title="RediSearch",
        body="Redisearch impements a search engine on top of redis",
        parent="redis",
        random_num=10,
    )
    client.ft().add_document(
        "ai",
        title="RedisAI",
        body=
        "RedisAI executes Deep Learning/Machine Learning models and managing their data.",  # noqa
        parent="redis",
        random_num=3,
    )
    client.ft().add_document(
        "json",
        title="RedisJson",
        body=
        "RedisJSON implements ECMA-404 The JSON Data Interchange Standard as a native data type.",  # noqa
        parent="redis",
        random_num=8,
    )

    req = aggregations.AggregateRequest("redis").group_by(
        "@parent",
        reducers.count(),
    )

    res = client.ft().aggregate(req).rows[0]
    assert res[1] == "redis"
    assert res[3] == "3"

    req = aggregations.AggregateRequest("redis").group_by(
        "@parent",
        reducers.count_distinct("@title"),
    )

    res = client.ft().aggregate(req).rows[0]
    assert res[1] == "redis"
    assert res[3] == "3"

    req = aggregations.AggregateRequest("redis").group_by(
        "@parent",
        reducers.count_distinctish("@title"),
    )

    res = client.ft().aggregate(req).rows[0]
    assert res[1] == "redis"
    assert res[3] == "3"

    req = aggregations.AggregateRequest("redis").group_by(
        "@parent",
        reducers.sum("@random_num"),
    )

    res = client.ft().aggregate(req).rows[0]
    assert res[1] == "redis"
    assert res[3] == "21"  # 10+8+3

    req = aggregations.AggregateRequest("redis").group_by(
        "@parent",
        reducers.min("@random_num"),
    )

    res = client.ft().aggregate(req).rows[0]
    assert res[1] == "redis"
    assert res[3] == "3"  # min(10,8,3)

    req = aggregations.AggregateRequest("redis").group_by(
        "@parent",
        reducers.max("@random_num"),
    )

    res = client.ft().aggregate(req).rows[0]
    assert res[1] == "redis"
    assert res[3] == "10"  # max(10,8,3)

    req = aggregations.AggregateRequest("redis").group_by(
        "@parent",
        reducers.avg("@random_num"),
    )

    res = client.ft().aggregate(req).rows[0]
    assert res[1] == "redis"
    assert res[3] == "7"  # (10+3+8)/3

    req = aggregations.AggregateRequest("redis").group_by(
        "@parent",
        reducers.stddev("random_num"),
    )

    res = client.ft().aggregate(req).rows[0]
    assert res[1] == "redis"
    assert res[3] == "3.60555127546"

    req = aggregations.AggregateRequest("redis").group_by(
        "@parent",
        reducers.quantile("@random_num", 0.5),
    )

    res = client.ft().aggregate(req).rows[0]
    assert res[1] == "redis"
    assert res[3] == "8"  # median of 3,8,10

    req = aggregations.AggregateRequest("redis").group_by(
        "@parent",
        reducers.tolist("@title"),
    )

    res = client.ft().aggregate(req).rows[0]
    assert res[1] == "redis"
    assert res[3] == ["RediSearch", "RedisAI", "RedisJson"]

    req = aggregations.AggregateRequest("redis").group_by(
        "@parent",
        reducers.first_value("@title").alias("first"),
    )

    res = client.ft().aggregate(req).rows[0]
    assert res == ["parent", "redis", "first", "RediSearch"]

    req = aggregations.AggregateRequest("redis").group_by(
        "@parent",
        reducers.random_sample("@title", 2).alias("random"),
    )

    res = client.ft().aggregate(req).rows[0]
    assert res[1] == "redis"
    assert res[2] == "random"
    assert len(res[3]) == 2
    assert res[3][0] in ["RediSearch", "RedisAI", "RedisJson"]
Example #13
0
def ftadd_beers(r, rsclient):

    # create beer index
    ftidxfields = [
        TextField('name', weight=5.0),
        TextField('brewery'),
        NumericField('breweryid', sortable=True),
        TextField('category'),
        NumericField('categoryid'),
        TextField('style'),
        NumericField('styleid'),
        TextField('description'),
        NumericField('abv', sortable=True),
        NumericField('ibu', sortable=True),
        TagField('favorite')
    ]
    rsclient.create_index([*ftidxfields])

    header = []
    dontadd = 0
    with open(beerfile) as csvfile:
        beers = csv.reader(csvfile)
        for row in beers:
            docid = ''
            docscore = 1.0
            ftaddfields = {}

            if beers.line_num == 1:
                header = row
                continue

            for idx, field in enumerate(row):
                if idx == 0:
                    docid = "{}:{}".format(beer, field)
                    continue

                # idx 1 is brewery name
                if idx == 1:

                    if field == "":
                        # something is wrong with the csv, skip this line.
                        print("\tEJECTING: {}".format(row))
                        dontadd = 1
                        break
                    bkey = "{}:{}".format(brewery, field)
                    ftaddfields['brewery'] = r.hget(bkey, 'name')
                    ftaddfields['breweryid'] = field

                # idx 2 is beer name
                elif idx == 2:

                    ftaddfields['name'] = field

                # idx 3 is category ID
                elif idx == 3:

                    catname = 'None'
                    if int(field) != -1:
                        # get the category key and hget the name of the category
                        ckey = "{}:{}".format(category, field)
                        catname = r.hget(ckey, 'cat_name')

                    ftaddfields['category'] = catname
                    ftaddfields['categoryid'] = field

                # idx 4 is style ID
                elif idx == 4:

                    stylename = 'None'

                    if int(field) != -1:
                        skey = "{}:{}".format(style, field)
                        stylename = r.hget(skey, 'style_name')

                    ftaddfields['style'] = stylename
                    ftaddfields['styleid'] = field

                # idx 5 is ABV
                elif idx == 5:

                    ftaddfields['abv'] = field

                    # update the document score based on ABV
                    docscore = get_beer_doc_score(field)

                # idx 6 is IBU
                elif idx == 6:

                    ftaddfields['ibu'] = field

            if dontadd:
                dontadd = 0
                continue

            # add beer document
            rsclient.add_document(docid, score=docscore, **ftaddfields)