Example #1
0
def test():
    # Creating a client with a given index name
    client = Client('myIndex')

    # Creating the index definition and schema
    client.drop_index()
    client.create_index([TextField('title', weight=5.0), TextField('body')])

    # Indexing a document
    client.add_document(
        'doc1',
        title='RediSearch',
        body='Redisearch implements a search engine on top of redis')

    # Simple search
    res = client.search("search engine")

    # the result has the total number of results, and a list of documents
    print res.total  # "1"
    print res.docs[0]

    # Searching with snippets
    # res = client.search("search engine", snippet_sizes={'body': 50})

    # Searching with complex parameters:
    q = Query("search engine").verbatim().no_content().paging(0, 5)
    res = client.search(q)
Example #2
0
    def clientpush(self):
        client = Client('Checkout')

        client.create_index([
            NumericField('Key'),
            TextField('UsageClass'),
            TextField('CheckoutType'),
            TextField('MaterialType'),
            NumericField('CheckoutYear'),
            NumericField('CheckoutMonth'),
            NumericField('Checkouts'),
            TextField('Title'),
            TextField('Creator'),
            TextField('Subjects'),
            TextField('Publisher'),
            TextField('PublicationYear')
        ])

        db_connection, _ = self.connect()
        cursor = db_connection.cursor()
        cursor.execute('SELECT * FROM customers')
        results = cursor.fetchall()
        i = 0
        for result in results:
            client.add_document('doc%s' % i,
                                Key=result[0],
                                UsageClass=result[1],
                                CheckoutType=result[2],
                                MaterialType=result[3],
                                CheckoutYear=result[4],
                                CheckoutMonth=result[5],
                                Checkouts=result[6],
                                Title=result[7],
                                Creator=result[8],
                                Subjects=result[9],
                                Publisher=result[10],
                                PublicationYear=result[11])
            i += 1
            print(i)
        res = client.search('BOOK')

        print("{}   {}".format(res.total, res.docs[0].Title))
        res1 = client.search("use")
        print(res1)
        q = Query('use').verbatim().no_content().paging(0, 5)
        res1 = client.search(q)
        print(res1)
        cursor.close()
        db_connection.close()
Example #3
0
def general_search(request) -> Response:
    """
    Default full text search on all resources if no sources are specified.

    Faceted search if sources are specified.

    **query**: Query to search.
    **source**: Multiple sources can be specifed.
    """

    client = Client(INDEX_NAME, conn=get_redis_connection())

    query = request.GET.get('query')
    sort_stars = request.GET.get('sort-stars')
    resources = request.GET.getlist('source')
    languages = request.GET.getlist('language')
    awesome_lists = request.GET.getlist('awesome-list')

    query = format_query(query, resources, languages, awesome_lists)
    results = client.search(Query(query))
    results = [doc.__dict__ for doc in results.docs]
    if sort_stars == "true":
        results.sort(key=lambda x: int(x['stargazers_count']), reverse=True)

    return Response({
        "docs": results
    })
Example #4
0
class RediSearchClient(object):
    def __init__(self, index_name):
        self.client = Client(index_name)
        self.index_name = index_name

    def build_index(self, line_doc_path, n_docs):
        line_pool = LineDocPool(line_doc_path)

        try:
            self.client.drop_index()
        except:
            pass

        self.client.create_index([TextField('title'), TextField('url'), TextField('body')])

        for i, d in enumerate(line_pool.doc_iterator()):
            self.client.add_document(i, nosave = True, title = d['doctitle'],
                    url = d['url'], body = d['body'])

            if i + 1 == n_docs:
                break

            if i % 1000 == 0:
                print "{}/{} building index".format(i, n_docs)

    def search(self, query):
        q = Query(query).paging(0, 5).verbatim()
        res = self.client.search(q)
        # print res.total # "1"
        return res
Example #5
0
def product_search(request):
    search_key = request.POST.get('search_key', "").strip()
    if len(search_key) == 0:
        return JsonResponse({'product_detail_list': []})
    for t in [
            'tee', 't shirt', 't-shirt', 'tees', 't shirts', 't-shirts',
            'tshirts'
    ]:
        search_key = 'tshirt' if search_key == t else search_key
    client = Client('productIndex')
    q = Query(search_key)
    q.paging(0, 60)
    product_id_list = []
    try:
        res = client.search(q)
        for data in res.docs:
            product_id_list.append(data.id)
    except Exception:
        index = create_product_search_index()
        create_product_autocompleter()
        res = client.search(q)
        for data in res.docs:
            product_id_list.append(data.id)
    if len(product_id_list) == 0:
        sk = search_key.split()
        for substr in sk:
            if len(substr) > 0:
                q._query_string = substr
                res = client.search(q)
                for data in res.docs:
                    product_id_list.append(data.id)
        product_id_list = list(set(product_id_list))
    product_detail_list = product_view.cached_product_detail(product_id_list)
    context = {
        'product_detail_list': product_detail_list,
        'total_number_of_products': len(product_detail_list),
        'no_of_products': len(product_detail_list),
        'subtypes': True,
    }
    return JsonResponse(context)
Example #6
0
def magic_fb_search(request):
    if not request.user.is_authenticated():
        return JsonResponse({'product_detail_list': []})
    user = request.user
    client = Client('productIndex')
    try:
        # if search indes is not there it will create a search index
        res = client.search('test')
    except Exception:
        index = create_product_search_index()
        create_product_autocompleter()  # up to here

    fb_likes = UserFbLikes.objects.filter(user=user)
    likes_product_map = {}

    for fb_like in fb_likes:
        try:
            res = client.search(fb_like.fb_page)
            for data in res.docs:
                likes_product_map.update({
                    str(data.id):
                    likes_product_map.get(str(data.id), []) +
                    [fb_like.fb_page]
                })
        except Exception:
            print fb_like.fb_page
    product_list = Product.objects.filter(
        id__in=likes_product_map.keys()).order_by('-id')
    product_detail_list = product_view.product_details(product_list)
    for i in range(len(product_detail_list)):
        product_id = str(product_detail_list[i].get('id'))
        likes = likes_product_map.get(product_id)
        product_detail_list[i].update({'fb_likes': list(set(likes))})

    context = {'product_detail_list': product_detail_list}
    return JsonResponse(context)
Example #7
0
def searchdb(search_content):
    global total
    client = Client("BoxGroup", port=6379)
    search_content = ' '.join(jieba.cut(search_content))
    q = Query(search_content).verbatim().paging(0, 500)
    res = client.search(q)
    total = res.total
    titlelist = []
    i = 0
    while i < res.total:
        titlelist.append(res.docs[i].title)
        i += 1
    if res.total > 0:
        return titlelist
    elif res.total == 0:
        return "No result found"
Example #8
0
class CacheEngine:
    def __init__(self, hostname: str, idx_name: str, port=6379) -> None:
        self._ready = False
        self._setup_client(hostname, idx_name, port)

    def _setup_client(self, hostname: str, idx_name: str, port=6379) -> None:
        try:
            self._client = Client(idx_name, host=hostname, port=port)
            self._auto_compl = AutoCompleter(idx_name, hostname, port=port)
            self._hostname = hostname
            self._port = port
            self._idx = idx_name
            self._ready = True
            LOGGER.info("Cache engine is ready")
        except:
            self._client = None
            LOGGER.error("Cache engine is faulty!")

    def add_doc(self, doc_id: str, data: dict) -> Any:
        if dict is None:
            return False
        results = self._client.redis.hset(doc_id, mapping=data)
        return results

    def search(self, text_to_search: str) -> Result:
        results: Result = self._client.search(text_to_search)
        return results

    def get_doc(self, doc_id) -> Document:
        try:
            data = self._client.load_document(doc_id)
            return data
        except:
            return None

    def add_suggestion(self, suggestion) -> bool:
        results = None
        try:
            results = self._auto_compl.add_suggestions(Suggestion(suggestion))
        except:
            return False
        return True

    def get_suggestion(self, str_to_suggest: str) -> List:
        suggs = self._auto_compl.get_suggestions(str_to_suggest,
                                                 fuzzy=len(str_to_suggest) > 3)
        return suggs
Example #9
0
 def search(cls, query, offset=0, paginate=10):
     client = Client("tower", port=6379, host=os.getenv('REDIS_HOST'))
     q = Query(query).paging(offset, paginate)
     res = client.search(q)
     result = []
     for doc in res.docs:
         value_dict = {
             'id': doc.id,
             'client_ip': doc.clientIp,
             'service': doc.service,
             'error_message': doc.errorMessage,
             'stack_trace': doc.stackTrace,
             'numberRange': doc.numberRange
         }
         result.append(value_dict)
     print(res)
     return result
Example #10
0
def product_search(query, limit=10, fuzzy_search=True):
    search_results = {"from_redisearch": True, "results": []}

    if not is_redisearch_enabled():
        # Redisearch module not enabled
        search_results["from_redisearch"] = False
        search_results["results"] = get_product_data(query, 0, limit)
        return search_results

    if not query:
        return search_results

    red = frappe.cache()
    query = clean_up_query(query)

    # TODO: Check perf/correctness with Suggestions & Query vs only Query
    # TODO: Use Levenshtein Distance in Query (max=3)
    ac = AutoCompleter(make_key(WEBSITE_ITEM_NAME_AUTOCOMPLETE), conn=red)
    client = Client(make_key(WEBSITE_ITEM_INDEX), conn=red)
    suggestions = ac.get_suggestions(
        query,
        num=limit,
        fuzzy=fuzzy_search
        and len(query) > 3  # Fuzzy on length < 3 can be real slow
    )

    # Build a query
    query_string = query

    for s in suggestions:
        query_string += f"|('{clean_up_query(s.string)}')"

    q = Query(query_string)

    results = client.search(q)
    search_results["results"] = list(map(convert_to_dict, results.docs))
    search_results["results"] = sorted(
        search_results["results"],
        key=lambda k: frappe.utils.cint(k["ranking"]),
        reverse=True)

    return search_results
Example #11
0
    def get(self, request):
        # data=request.data
        mes = {}
        search_key = request.GET.get('key')
        print(search_key)
        all_classes = Course.objects.all()
        print("开始创建索引——————————————————————————")
        # 创建一个客户端与给定索引名称
        client = Client('CII' + str(datetime.now()), host=settings.SIP, port='6666')

        # 创建索引定义和模式
        client.create_index((TextField('title'), TextField('body')))
        print('索引创建完毕————————————————————————————————')
        print('开始添加数据————————————————————————————————')

        for i in all_classes:
            print(str(i.id) + str(i.title))
            # 索引文
            client.add_document('result' + str(datetime.now()), title=i.title + '@' + str(i.id), info=i.info,
                                language='chinese')
            print(333333333)
        print('数据添加完毕————————————————————————————————')
        print(client.info())
        # 查找搜索
        client = Client('CII' + str(datetime.now()), host=settings.SIP, port='6666')

        res = client.search(search_key)
        print('查询结束————————————————————————————————————————————————')
        id_list = []
        print(res.docs)
        for i in res.docs:
            # print(i.title)  # 取出title,以@切割,取课程ID查询,然后序列化展示
            id = i.title.split('@')[1]
            id_list.append(id)
        course = Course.objects.filter(id__in=id_list).all()
        c = CourseSerializersModel(course, many=True)
        mes['course'] = c.data
        mes['code'] = 200
        mes['message'] = '搜索完毕'
        return Response(mes)
class search:
    def __init__(self):
        self.redis_info = config.Config.redis_info
        self.ip, self.port, self.db = self.redis_info["host"], self.redis_info[
            "port"], self.redis_info["db"]
        index_name = self.redis_info["tb_name"]
        self.client = Client(index_name, self.ip, self.port)
        #self.rd_con = self.make_redis_connection()
        self.escape1 = re.compile(r'&#\d+;')
        self.escape2 = re.compile(
            r',|\.|<|>|{|}|[|]|"|\'|:|;|!|@|#|\$|%|\^|&|\*|\(|\)|-|\+|=|~')
        self.escape3 = re.compile(r'\s+')
        pass

    def StringEscape(self, search_str):
        search_str = re.sub(self.escape1, '', search_str)
        search_str = re.sub(self.escape2, '', search_str)
        search_str = re.sub(self.escape3, ' ', search_str)
        return search_str.strip()

    def make_redis_connection(self):
        ip, port, db = self.config.get('redis_search', 'storage').split('##')
        self.ip = ip
        self.port = port
        redis_conn = redis.StrictRedis(host=ip, port=str(port), db=str(db))
        return redis_conn

    def search_exact_Query(self, string):
        string = self.StringEscape(string)
        query = "(@look_cmp:%s*)|(@cmp_k:%s*)" % (string, string)
        res = self.client.search(Query(query).paging(0, 10000))
        arr = []
        for x in res.docs:
            arr.append({"k": x.cmp_k, "n": x.cmp_name})
        arr.sort(key=lambda x: len(x['n']))
        return [{"message": "done", "data": arr}]
Example #13
0
from redisearch import Client, TextField

client = Client('myIndex')
while True:
    query = input("query > ")
    if query:
        q = client.search(query)
        n_match = len(q.docs)
        print(f"found match {n_match} for keyword {query}")
        for row in q.docs:
            print(row.id, row.title, row.body)

Example #14
0
class TAS_AutoCompleter:
    def __init__(self,
                 host='172.16.20.7',
                 port=6382,
                 db=0,
                 autocomplete_name='Default'):
        self.client = Client(autocomplete_name, host, port)
        self.ipAdd = host
        self.ipPort = port
        self.db = db
        self.redisConn = redis.StrictRedis(host=self.ipAdd,
                                           port=self.ipPort,
                                           db=self.db)
        self.autocomplete = AutoCompleter(autocomplete_name, host, port)
        self.escape1 = re.compile(r'&#\d+;')
        self.escape2 = re.compile(
            r',|\.|<|>|{|}|[|]|"|\'|:|;|!|@|#|\$|%|\^|&|\*|\(|\)|-|\+|=|~')
        self.escape3 = re.compile(r'\s+')

    def search_using_FT(self, search_text, index):
        search_text = search_text.replace(' ', '*')
        query_string = 'FT.SEARCH ' + index + ' ' + search_text + ' LIMIT 0 100'
        res = self.redisConn.execute_command(query_string)
        fs = []
        for i, rr in enumerate(res):
            if i == 0: continue
            if i % 2 != 0: continue
            fs.append(rr)
        return fs

    def search_exact_Query_using_ft(self, index, query):
        query_string = 'FT.SEARCH ' + index + ' ' + query + ' LIMIT 0 1000'
        res = self.redisConn.execute_command(query_string)
        fs = []
        for i, rr in enumerate(res):
            if i == 0: continue
            if i % 2 != 0: continue
            fs.append(rr)
        return fs

    def StringEscape(self, search_str):
        search_str = re.sub(self.escape1, '', search_str)
        search_str = re.sub(self.escape2, '', search_str)
        search_str = re.sub(self.escape3, ' ', search_str)
        return search_str.strip()

    def simple_search(self, text):
        res = self.client.search(text)
        fs = []
        if res:
            for i, rr in enumerate(res.docs):
                fs.append([
                    rr.DOCID, rr.SECTION_TYPE, rr.GRIDID, rr.BBOX, rr.ROWCOL,
                    rr.DATA, rr.id, rr.PAGE
                ])
        return fs

    def search_exact_Query(self, query):
        return self.client.search(Query(query).paging(0, 1000))

    def search_query_convert_bk(self, query):
        res = self.search_exact_Query(query)
        fs = {}
        if res:
            for i, rr in enumerate(res.docs):
                vv = rr.DOCID + "_" + rr.PAGE + "_" + rr.GRIDID
                if vv in fs:
                    fs[vv]['count'] = fs[vv]['count'] + 1
                    fs[vv]['info'].append([
                        rr.DOCID, rr.SECTION_TYPE, rr.GRIDID, rr.BBOX,
                        rr.ROWCOL, rr.DATA, rr.id, rr.PAGE
                    ])
                else:
                    fs[vv] = {
                        'count':
                        1,
                        'info': [[
                            rr.DOCID, rr.SECTION_TYPE, rr.GRIDID, rr.BBOX,
                            rr.ROWCOL, rr.DATA, rr.id, rr.PAGE
                        ]]
                    }
                #fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE])
        return fs

    def search_query_convert(self, query, fs):
        res = self.search_exact_Query(query)
        if res:
            for i, rr in enumerate(res.docs):
                vv = rr.PAGE + "_" + rr.GRIDID
                if vv in fs:
                    fs[vv] = fs[vv] + 1
                else:
                    fs[vv] = 1
                #fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE])
        return fs

    def search_query_convert_result(self, query):
        res = self.search_exact_Query(query)
        fs = []
        if res:
            for i, rr in enumerate(res.docs):
                fs.append([rr.DATA, rr.PAGE, rr.BBOX])
        return fs

    def search_query_convert_result_auto(self, query, fs):
        res = self.search_exact_Query(query)
        if res:
            for i, rr in enumerate(res.docs):
                DATA, DOCID, GRIDID, PAGE, BBOX = rr.DATA, rr.DOCID, rr.GRIDID, rr.PAGE, rr.BBOX
                if not DATA: continue
                grid_id = str(DOCID) + "##" + str(PAGE) + "$" + str(GRIDID)
                if DATA not in fs:
                    fs[DATA] = {}
                if grid_id not in fs[DATA]:
                    fs[DATA][grid_id] = []
                fs[DATA][grid_id].append(BBOX)
        return fs

    def search_query_convert_result_page_grouping(self, query):
        res = self.search_exact_Query(query)
        fs = {}
        if res:
            for i, rr in enumerate(res.docs):
                #print rr
                page = rr.PAGE
                bbox = rr.BBOX
                if page not in fs:
                    fs[page] = []
                fs[page].append([rr.DATA, rr.BBOX])
        return fs

    def search_using_Query(self, search_text, index):
        search_text = search_text
        query = '@DATA:"%s"' % search_text
        #,search_text+"*")
        #query = '@BBOX:"%s"'%('109')
        res = self.client.search(Query(query).paging(0, 10000))
        fs = []
        if res:
            for i, rr in enumerate(res.docs):
                fs.append([
                    rr.DOCID, rr.SECTION_TYPE, rr.GRIDID, rr.BBOX, rr.ROWCOL,
                    rr.DATA, rr.id, rr.PAGE
                ])
        return fs
Example #15
0
from redisearch import Client, Query, TextField, GeoField, NumericField, GeoFilter, NumericFilter

client = Client('attractions', host='127.0.0.1', password='', port=6379)

print("Full text search for a 'ball string':")
q = Query("ball string").verbatim()
res = client.search(q)
for doc in res.docs:
    print("\t", doc.description)

print(
    "Full text search for a 'ball string' search within 300 miles of Kansas City that is verified"
)
q = Query("ball string").add_filter(
    GeoFilter('geo', -94.5786, 39.0997, 300,
              unit='mi')).add_filter(NumericFilter('verified', 1,
                                                   1)).verbatim()
res = client.search(q)
for doc in res.docs:
    print("\t", doc.description)
Example #16
0
from redisearch import Client, TextField, NumericField, Query

# Creating a client with a given index name
client = Client('myIndex', port=6380, host='localhost')

# Creating the index definition and schema
client.drop_index()
client.create_index([TextField('title', weight=5.0), TextField('body')])

# Indexing a document
client.add_document(
    'doc1',
    title='RediSearch',
    body='Redisearch implements a search engine on top of redis')

# Simple search
res = client.search("search engine")

# the result has the total number of results, and a list of documents
print(res.total)  # "1"
print(res.docs[0])

# Searching with snippets
#res = client.search("search engine", snippet_sizes = {'body': 50})

# Searching with complex parameters:
q = Query("search engine").verbatim().no_content().paging(0, 5)
res = client.search(q)
Example #17
0
class TAS_Redisearch():

    #Constructor
    def __init__(self, table_name, host="localhost", port=6381):
        try:
            self.client = Client(table_name, host, port)
            self.host = host
            self.port = port
            self.table_name = table_name
            self.redis = Redis()
            self.LIMIT = 10
        except Exception as e:
            print 'yyy'
            print >> sys.stderr, "TAS_Redisearch Error inside Constructor Index:\'", table_name, "\' HOST:\'", host, "\' PORT:\'", port, "\'\n"
            print >> sys.stderr, e

    #Will set the no of results to show
    def set_result_limit(self, num):
        self.LIMIT = num
        return

    #Defines the schema for Redisearch
    def set_schema(self, schema):
        try:
            return self.client.create_index(
                schema, False, False, []
            )  #last empty list will ensure that default stopwords will not be ignored
        except Exception as e:
            print >> sys.stderr, "TAS_Redisearch Error inside set_schema Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e

    #Deletes index(table)
    def drop_index(self):
        try:
            return self.client.drop_index()
        except Exception as e:
            print >> sys.stderr, "TAS_Redisearch Error inside drop_index Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e

    #Deletes a document(row) by document_index
    def delete_document(self, document_index):
        try:
            return self.client.delete_document(document_index)
        except Exception as e:
            print >> sys.stderr, "TAS_Redisearch Error inside delete_document Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e

    #############################################SEARCHES BELOW#######################################

    #Uses python libraries
    def py_search(self, query, result_limit=-1):
        if result_limit == -1:
            result_limit = self.LIMIT
        try:
            return self.client.search(Query(query).paging(0, result_limit))
        except Exception as e:
            print >> sys.stderr, "TAS_Redisearch Error inside py_search Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print sys.stderr, e

    #Search with default parameters [will return dictionary]
    def generic_search(self, search_text, result_limit=-1):
        if result_limit == -1:
            result_limit = self.LIMIT
        query_string = "FT.SEARCH " + self.table_name + " " + search_text + " LIMIT 0 " + str(
            result_limit)
        try:
            res = self.redis.execute_command(query_string)
            return Result(res, True)
        except Exception as e:
            print >> sys.stderr, "TAS_Redisearch Error inside generic_search Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e

    def free_exact_search(self, key, result_limit=-1):
        org_key = key
        l = []
        try:
            if result_limit == -1:
                result_limit = self.LIMIT
            key = self.clean_string(key)
            returned = self.py_search("*", result_limit)
            for result in returned.docs:
                result_dict = vars(result)
                if org_key in result_dict.values():
                    l.append(result_dict)
        except Exception as e:
            print >> sys.stderr, "TAS_Redisearch Error inside value_search Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e
        return l

    #{fieldname:[value1, value2], fieldname:[value1, value2]}
    def exact_search(self, input_dict, result_limit=-1):
        formed_str = ""
        l = []
        for field, value_list in input_dict.items():
            formed_str += "@" + field + ":("
            for key in value_list:
                key = self.clean_string(key)
                formed_str += "(\'" + key + "\') | "
            formed_str = formed_str.rstrip(' |')
            formed_str += ") "
        print "PASSED: ", formed_str
        returned = self.py_search(formed_str, result_limit)
        print "RETURNED:", returned
        for result in returned.docs:
            result_dict = vars(result)
            for itr, ktr in input_dict.items():
                if result_dict[itr] in ktr:
                    l.append(result_dict)

        return l

    #Search with the passed query
    def custom_search(self, query_string):
        try:
            res = self.redis.execute_command(query_string)
            return Result(res, True)
        except Exception as e:
            print >> sys.stderr, "TAS_Redisearch Error inside custom_search Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e

    #Search in 'search_in_field' [if any of the element in 'list_to_union' is found then include it in the result
    def union_search(self, list_to_union, search_in_field):
        query_string = "FT.SEARCH " + self.table_name + " "
        union_text = "@" + search_in_field + ":("
        for text in list_to_union:
            union_text += text + "|"

        union_text = union_text.rstrip("|")
        union_text += ")"
        query_string += union_text
        try:
            res = self.redis.execute_command(query_string)
            return Result(res, True)
        except Exception as e:
            print >> sys.stderr, "TAS_Redisearch Error inside union_search Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e

    #will return all the dictionary for all the categories if no arguments are passed
    def category_taxonomy_dict(self, category='*'):
        try:
            cat_taxo_dict = {}
            total_docs = self.client.info()['num_docs']
            query_string = ""
            if category == '*':
                query_string = category
            else:
                query_string = "@CATEGORY:" + category
            result = self.py_search(query_string, total_docs)
            for single_result in result.docs:
                try:
                    category = single_result.CATEGORY
                    taxoname = single_result.TAXONAME
                except Exception as ex:
                    pass
                if not category in cat_taxo_dict:
                    cat_taxo_dict[category] = []
                elif taxoname not in cat_taxo_dict[category]:
                    cat_taxo_dict[category].append(taxoname)
        except Exception as e:
            sys.stderr, "TAS_Redisearch Error inside category_taxonomy_dict Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e
        return cat_taxo_dict

    def total_record(self):
        try:
            return int(self.client.info()['num_docs'])
        except Exception as e:
            sys.stderr, "TAS_Redisearch Error inside total_records Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e

    def get_all_records(self):
        try:
            total = str(self.total_record())
            res = self.redis.execute_command("FT.SEARCH " + self.table_name +
                                             " * LIMIT 0 " + total)
            return Result(res, True)
        except Exception as e:
            sys.stderr, "TAS_Redisearch Error inside total_records Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e

    def clean_string(self, key):
        key = key.replace(',', ' ')
        key = key.replace('.', ' ')
        key = key.replace('<', ' ')
        key = key.replace('>', ' ')
        key = key.replace('{', ' ')
        key = key.replace('}', ' ')
        key = key.replace('[', ' ')
        key = key.replace(']', ' ')
        key = key.replace('"', ' ')
        key = key.replace('\'', ' ')
        key = key.replace(':', ' ')
        key = key.replace(';', ' ')
        key = key.replace('!', ' ')
        key = key.replace('@', ' ')
        key = key.replace('#', ' ')
        key = key.replace('$', ' ')
        key = key.replace('%', ' ')
        key = key.replace('^', ' ')
        key = key.replace('&', ' ')
        key = key.replace('*', ' ')
        key = key.replace('(', ' ')
        key = key.replace(')', ' ')
        key = key.replace('-', ' ')
        key = key.replace('+', ' ')
        key = key.replace('=', ' ')
        key = key.replace('~', ' ')

        return key
Example #18
0
key = 'doc:6'
r.hset( key, 'title' , 'Pirate adventures')
r.hset( key, 'body'  , 'do you dare to fight other pirates and live many adventures to get the golden chest')
r.hset( key, 'url'   , 'www.piratelife.com')
r.hset( key, 'visits', '600')

# ------------------------------------------------------------
# Full text Search with rediSearch module.
# ------------------------------------------------------------


client = Client( index_name = 'database_idx'
, host= REDIS_HOST
, port= REDIS_PORT
, conn= r
, password = REDIS_AUTH  )

# Simple search
res = client.search( "adventures" )

# the result has the total number of results, and a list of documents


print( '\n Number of found docs: {}'.format( res.total ) ) # "2"
for d in res.docs:
    print( '{}. title: {}, body: {}'.format( d.id, d.title, d.body )   ) # "RediSearch"

print( '\n\n End \n' )

Example #19
0
class Hub(object):
    dconn = None  # document store connection
    sconn = None  # search index connection
    qconn = None  # queue connection
    gh = None
    autocomplete = None
    repo = None
    _ts = None
    _hubkey = 'hub:catalog'
    _ixname = 'ix'
    _acname = 'ac'

    def __init__(self,
                 ghlogin_or_token=None,
                 docs_url=None,
                 search_url=None,
                 queue_url=None,
                 repo=None):
        timestamp = datetime.utcnow()
        logger.info('Initializing temporary hub {}'.format(timestamp))

        if ghlogin_or_token:
            self.gh = Github(ghlogin_or_token)
        elif 'GITHUB_TOKEN' in os.environ:
            self.gh = Github(os.environ['GITHUB_TOKEN'])
        else:
            logger.info('Env var ' 'GITHUB_TOKEN' ' not found')

        if docs_url:
            pass
        elif 'DOCS_REDIS_URL' in os.environ:
            docs_url = os.environ['DOCS_REDIS_URL']
        else:
            logger.critical('No Redis for document storage... bye bye.')
            raise RuntimeError('No Redis for document storage... bye bye.')
        self.dconn = ReJSONClient().from_url(docs_url)

        if search_url:
            pass
        elif 'SEARCH_REDIS_URL' in os.environ:
            search_url = os.environ['SEARCH_REDIS_URL']
        else:
            search_url = docs_url
        conn = Redis(connection_pool=ConnectionPool().from_url(search_url))
        self.sconn = RediSearchClient(self._ixname, conn=conn)
        self.autocomplete = AutoCompleter(self._acname, conn=conn)

        if queue_url:
            pass
        elif 'QUEUE_REDIS_URL' in os.environ:
            queue_url = os.environ['QUEUE_REDIS_URL']
        else:
            queue_url = docs_url
        self.qconn = StrictRedis.from_url(queue_url)

        if repo:
            pass
        elif 'REDISMODULES_REPO' in os.environ:
            repo = os.environ['REDISMODULES_REPO']
        else:
            logger.critical('No REDISMODULES_REPO... bye bye.')
            raise RuntimeError('No REDISMODULES_REPO... bye bye.')
        self.repo = repo

        # Check if hub exists
        if self.dconn.exists(self._hubkey):
            self._ts = datetime.fromtimestamp(
                float(self.dconn.jsonget(self._hubkey, Path('.created'))))
            logger.info('Latching to hub {}'.format(self._ts))
        else:
            self._ts = timestamp
            logger.info('Creating hub {}'.format(self._ts))
            self.createHub()
            self.addModulesRepo(self.repo)

    def get_repo_url(self):
        return 'https://github.com/{}'.format(self.repo)

    def createHub(self):
        logger.info('Creating the hub in the database {}'.format(self._ts))
        # Store the master modules catalog as an object
        self.dconn.jsonset(
            self._hubkey, Path.rootPath(), {
                'created': str(_toepoch(self._ts)),
                'modules': {},
                'submissions': [],
                'submit_enabled': False
            })

        # Create a RediSearch index for the modules
        # TODO: catch errors
        self.sconn.create_index(
            (TextField('name', sortable=True), TextField('description'),
             NumericField('stargazers_count', sortable=True),
             NumericField('forks_count', sortable=True),
             NumericField('last_modified', sortable=True)),
            stopwords=stopwords)

    def deleteHub(self):
        # TODO
        pass

    def addModule(self, mod):
        logger.info('Adding module to hub {}'.format(mod['name']))
        # Store the module object as a document
        m = RedisModule(self.dconn, self.sconn, self.autocomplete, mod['name'])
        m.save(mod)

        # Add a reference to it in the master catalog
        self.dconn.jsonset(
            self._hubkey, Path('.modules["{}"]'.format(m.get_id())), {
                'id': m.get_id(),
                'key': m.get_key(),
                'created': str(_toepoch(self._ts)),
            })

        # Schedule a job to refresh repository statistics, starting from now and every hour
        s = Scheduler(connection=self.qconn)
        job = s.schedule(
            scheduled_time=datetime(1970, 1, 1),
            func=callRedisModuleUpateStats,
            args=[m.get_id()],
            interval=60 * 60,  # every hour
            repeat=None,  # indefinitely
            ttl=0,
            result_ttl=0)
        return m

    """
    Adds modules to the hub from a local directory
    TODO: deprecate asap
    """

    def addModulesPath(self, path):
        logger.info('Loading modules from local path {}'.format(path))
        # Iterate module JSON files
        for filename in os.listdir(path):
            if filename.endswith(".json"):
                with open('{}/{}'.format(path, filename)) as fp:
                    mod = json.load(fp)

                m = self.addModule(mod['name'], mod)

    """
    Adds a modules to the hub from a github repository
    """

    def addModulesRepo(self, name, path='/modules/'):
        # TODO: check for success
        q = Queue(connection=self.qconn)
        q.enqueue(callLoadModulesFromRepo, name, path)

    def loadModulesFromRepo(self, name, path):
        logger.info('Loading modules from Github {} {}'.format(name, path))
        # TODO: error handling, sometimes not all contents are imported?
        repo = self.gh.get_repo(name)
        files = repo.get_dir_contents(path)
        for f in files:
            mod = json.loads(f.decoded_content)
            m = self.addModule(mod)

    """
    Submits a module to the hub
    """

    def submitModule(self, repo_id, **kwargs):
        logger.info('Module submitted to hub {}'.format(repo_id))
        repo_id = repo_id.lower()
        ts = datetime.utcnow()
        res = {'id': repo_id, 'status': 'failed'}

        if not self.dconn.jsonget(self._hubkey, Path('submit_enabled')):
            res['message'] = 'Module submission is currently disabled'
            return res

        # Check if the module is already listed
        m = RedisModule(self.dconn, self.sconn, self.autocomplete, repo_id)
        if m.exists:
            # TODO: return in search results
            res['message'] = 'Module already listed in the hub'
            return res

        # Check if there's an active submission, or if the failure was too recent
        submission = Submission(self.dconn, repo_id)
        if submission.exists:
            status = submission.status
            if status != 'failed':
                res['status'] = 'active'
                res['message'] = 'Active submission found for module'
                return res
            else:
                # TODO: handle failed submissions
                res['message'] = 'Module already submitted to the hub and had failed, please reset manually for now'
                return res

        # Store the new submission
        submission.save(**kwargs)

        # Record the submission in the catalog
        # TODO: find a good use for that, e.g. 5 last submissions
        self.dconn.jsonarrappend(self._hubkey, Path('.submissions'), {
            'id': submission.get_id(),
            'created': submission.created,
        })

        # Add a job to process the submission
        q = Queue(connection=self.qconn)
        job = q.enqueue(callProcessSubmission, submission.get_id())
        if job is None:
            res['message'] = 'Submission job could not be created'
            # TODO: design retry path
            logger.error(
                'Could not create submission processing job for {}'.format(
                    submission.get_id()))
        else:
            res['status'] = 'queued'
            submission.status = res['status']
            submission.job = job.id

        return res

    def viewSubmissionStatus(self, repo_id):
        submission = Submission(self.dconn, repo_id)
        if submission.exists:
            res = {
                'id': submission.get_id(),
                'status': submission.status,
                'message': submission.message,
            }
            if 'finished' == res['status']:
                res['pull_number'] = submission.pull_number
                res['pull_url'] = submission.pull_url
            return res

    def processSubmission(self, repo_id):
        logger.info('Processing submision for {}'.format(repo_id))
        submission = Submission(self.dconn, repo_id)
        if submission.exists:
            return submission.process(self.gh, self.repo)

    def viewModules(self, query=None, sort=None):
        if not query:
            # Use a purely negative query to get all modules
            query = '-etaoinshrdlu'
        q = Query(query).no_content().paging(0, 1000)
        if sort:
            if sort == 'relevance':
                pass
            elif sort == 'update':
                q.sort_by('last_modified')
            elif sort == 'stars':
                q.sort_by('stargazers_count', asc=False)
            elif sort == 'forks':
                q.sort_by('forks_count', asc=False)
            elif sort == 'name':
                q.sort_by('name')

        results = self.sconn.search(q)
        mods = []
        fetch_duration = 0
        # TODO: this should be pipelined
        for doc in results.docs:
            m = RedisModule(self.dconn, self.sconn, self.autocomplete, doc.id)
            res, duration = _durationms(m.to_dict)
            mods.append(res)
            fetch_duration += duration

        return {
            'results': results.total,
            'search_duration': '{:.3f}'.format(results.duration),
            'fetch_duration': '{:.3f}'.format(fetch_duration),
            'total_duration':
            '{:.3f}'.format(fetch_duration + results.duration),
            'modules': mods,
        }

    def viewSearchSuggestions(self, prefix):
        suggestions = self.autocomplete.get_suggestions(prefix)
        return [s.string for s in suggestions]
Example #20
0
class TAS_AutoCompleter:
    def __init__(self, host=ip, port=port, db=db, autocomplete_name='Default'):
	self.client = Client(autocomplete_name,host,port)
        self.ipAdd = host 
        self.ipPort  = port
        self.db = db
        self.redisConn = redis.StrictRedis(host=self.ipAdd, port=self.ipPort, db=self.db)
        self.autocomplete = AutoCompleter(autocomplete_name, host, port)
        self.escape1 = re.compile(r'&#\d+;')
        self.escape2 = re.compile(r',|\.|<|>|{|}|[|]|"|\'|:|;|!|@|#|\$|%|\^|&|\*|\(|\)|-|\+|=|~')
        self.escape3 = re.compile(r'\s+')
        #self.redisConn.execute_command('SET',*['Start','Here Started'])
    
    def search_using_FT(self,search_text,index):
	search_text = search_text.replace(' ','*')
        query_string = 'FT.SEARCH '+index+' '+search_text+' LIMIT 0 100'
        res = self.redisConn.execute_command(query_string)       
	fs = []
	for i,rr in enumerate(res):
		if i == 0:continue
		if i % 2 != 0:continue
		fs.append(rr)	
	return fs

    def search_exact_Query_using_ft(self,index,query):
        query_string = 'FT.SEARCH '+index+' '+query+' LIMIT 0 1000'
        res = self.redisConn.execute_command(query_string)       
	fs = []
	for i,rr in enumerate(res):
		if i == 0:continue
		if i % 2 != 0:continue
		fs.append(rr)	
	return fs

    def StringEscape(self, search_str):
        search_str = re.sub(self.escape1, '', search_str)
        search_str = re.sub(self.escape2, '', search_str)
        search_str = re.sub(self.escape3, ' ', search_str)
        return search_str.strip()  

    def simple_search(self,text):
	res = self.client.search(text)
	fs = []
	if res:
    		for i,rr in enumerate(res.docs):
			fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE])
	return fs
	
    def search_exact_Query(self,query):
	return self.client.search(Query(query).paging(0, 10000))

    def search_query_convert_bk(self,query):
	res = self.search_exact_Query(query)
	fs = {}
	if res:
    		for i,rr in enumerate(res.docs):
			vv = rr.DOCID+"_"+rr.PAGE+"_"+rr.GRIDID
			if vv in fs:
				fs[vv]['count'] = fs[vv]['count']+1
				fs[vv]['info'].append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE])
			else:
				fs[vv] = {'count': 1 ,'info': [[rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE]]}
			#fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE])
	return fs

    def search_query_convert_gridandpage(self,query,fs):
	res = self.search_exact_Query(query)
	if res:
    		for i,rr in enumerate(res.docs):
			vv = rr.PAGE+"_"+rr.GRIDID
			if vv in fs:
				fs[vv].append(rr.BBOX)
			else:
				fs[vv] = [rr.BBOX]
			#fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE])
	return fs

    def search_query_convert(self,query,fs):
	res = self.search_exact_Query(query)
	if res:
    		for i,rr in enumerate(res.docs):
			vv = rr.PAGE+"_"+rr.GRIDID
			if vv in fs:
				fs[vv] = fs[vv]+1
			else:
				fs[vv] = 1
			#fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE])
	return fs

    def get_header(self, query):
	res = self.search_exact_Query(query)
        text = ''
	if res:
    	    for i,rr in enumerate(res.docs):
                text = text + " "+rr.DATA
        return text

    def get_header_all_FT(self, query, index):
	res = self.search_exact_Query(query)
        query_string = 'FT.SEARCH '+index+' '+query+' LIMIT 0 10000'
        res = self.redisConn.execute_command(query_string)       
        print res
	if res:
    	    for i,rr in enumerate(res.docs):
                print rr
                text.append({'txt':rr.DATA, 'rc':rr.ROWCOL})
        return text

    def get_header_all(self, query):
	res = self.search_exact_Query(query)
        text = []
	if res:
    	    for i,rr in enumerate(res.docs):
                print rr
                text.append({'txt':rr.DATA, 'rc':rr.ROWCOL, 'rowspan': rr.Rowspan, 'colspan': rr.Colspan})
        return text
    
    def search_query_convert_docs_wise(self,query,fs):
	res = self.search_exact_Query(query)
	if res:
    		for i,rr in enumerate(res.docs):
			vv = rr.PAGE+"_"+rr.GRIDID
			if vv in fs:
				fs[vv] = fs[vv]+1
			else:
				fs[vv] = 1
			#fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE])
	return fs

    def search_query_convert_docs_wise_v1test(self,query,gfs,doc_id, cnt):
        #vres_doc = doc_id
        res = self.search_exact_Query(query)
        return res
        if res:
                fs  = {}
                for i,rr in enumerate(res.docs):
                        vv = rr.PAGE+"_"+rr.GRIDID
                        if vv in fs:
                                fs[vv] = fs[vv]+1
                        else:
                                fs[vv] = 1
                        #fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE])
                if fs:
                    gfs.setdefault(doc_id, {})
                    for vv, c in fs.items():
                        gfs[doc_id][vv] = gfs[doc_id].get(vv, 0)+(c*cnt)
        return gfs
     
    def search_query_convert_docs_wise_v1(self,query,gfs,doc_id, cnt):
        #vres_doc = doc_id
        res = self.search_exact_Query(query)
        if res:
                fs  = {}
                for i,rr in enumerate(res.docs):
                        vv = rr.PAGE+"_"+rr.GRIDID
                        if vv in fs:
                                fs[vv] = fs[vv]+1
                        else:
                                fs[vv] = 1
                        #fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE])
                if fs:
                    gfs.setdefault(doc_id, {})
                    for vv, c in fs.items():
                        gfs[doc_id][vv] = gfs[doc_id].get(vv, 0)+(c*cnt)
        return gfs

    def search_query_convert_docs_wise_v2_order(self,query,fs,doc_id):
        res = self.search_exact_Query(query)
        if res:
                for i,rr in enumerate(res.docs):
                        vv = rr.DOCID+"_"+rr.PAGE+"_"+rr.GRIDID
                        if vv not in fs:
                             fs[vv] = len(fs.keys())
        return fs

    def search_query_convert_docs_wise_v2(self,query,fs,doc_id):
        res = self.search_exact_Query(query)
        if res:
                for i,rr in enumerate(res.docs):
                        #if doc_id not in fs:
                        #     fs[doc_id] = {}
                        vv = rr.DOCID+"_"+rr.PAGE+"_"+rr.GRIDID
                        if vv not in fs:
                             fs[vv] = []
                        fs[vv].append([rr.ROWCOL,rr.BBOX, query, rr.SECTION_TYPE])
                        #fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE])
        return fs

    def search_query_convert_docs_wise_v2_mquery(self,query,fs,doc_id, query_wise_res):
        res = self.search_exact_Query(query)
        if res:
                query_wise_res.setdefault(query,[])
                for i,rr in enumerate(res.docs):
                        #if doc_id not in fs:
                        #     fs[doc_id] = {}
                        vv = rr.DOCID+"_"+rr.PAGE+"_"+rr.GRIDID
                        if vv not in fs:
                             fs[vv] = []
                        fs[vv].append([rr.ROWCOL,rr.BBOX, query, rr.SECTION_TYPE])
                        query_wise_res[query].append(vv)
                        #fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE])
        return fs

    def search_query_convert_testing(self,query,fs):
	res = self.search_exact_Query(query)
	if res:
    		for i,rr in enumerate(res.docs):
			print [query , rr]
			vv = rr.PAGE+"_"+rr.GRIDID
			if vv in fs:
				fs[vv] = fs[vv]+1
			else:
				fs[vv] = 1
			#fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE])
	return fs

    def search_query_convert_result(self,query):
	res = self.search_exact_Query(query)
	fs = []
	if res:
    		for i,rr in enumerate(res.docs):
			fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE])
	return fs
		

    def search_using_Query(self,search_text,index):
	search_text = search_text
	query = '@DATA:"%s"'%search_text
	#,search_text+"*")
	#query = '@BBOX:"%s"'%('109')
	res = self.client.search(Query(query).paging(0, 10000))
	fs = []
	if res:
    		for i,rr in enumerate(res.docs):
			fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE])
	return fs
Example #21
0
class UserCache:
    def __init__(self):
        self.client = Client("api_user_index", app.config["REDIS_HOST"],
                             app.config["REDIS_PORT"])

    def create_user_index(self, users):
        """
        Creates a new user index if not exists
        :param users:
        :return:
        """
        definition = IndexDefinition(prefix=['doc:', 'user:'])

        try:
            self.client.create_index(
                (TextField("first_name"), TextField("last_name"),
                 TextField("email"), NumericField("age"),
                 NumericField("is_employee"),
                 NumericField("user_id", sortable=True)),
                definition=definition)
        except redis.exceptions.ResponseError:
            return False

        indexer = self.client.batch_indexer(chunk_size=len(users))

        for user in users:
            fields = {
                "first_name":
                user.first_name.translate(str.maketrans({"-": r"\-"})),
                "last_name":
                user.last_name.translate(str.maketrans({"-": r"\-"})),
                "email":
                user.email.translate(str.maketrans({"-": r"\-"})),
                "age":
                user.age,
                "user_id":
                user.id,
                "is_employee":
                int(user.is_employee),
            }
            indexer.add_document(f"doc:{user.id}", **fields)
        indexer.commit()

        return True

    def cache_single_user(self, user):
        """
        Caches a single user
        :param user:
        :return:
        """
        self.client.redis.hset(
            f"doc:{user.id}",
            mapping={
                "first_name":
                user.first_name.translate(str.maketrans({"-": r"\-"})),
                "last_name":
                user.last_name.translate(str.maketrans({"-": r"\-"})),
                "email":
                user.email.translate(str.maketrans({"-": r"\-"})),
                "age":
                user.age,
                "user_id":
                user.id,
                "is_employee":
                int(user.is_employee),
            })

        return True

    def search(self, filters, page, per_page):
        """
        Searches through redis
        :return:
        """
        q = Query(self.build_query(filters)).paging(
            (page - 1) * per_page, per_page).sort_by("user_id")

        return self.client.search(q)

    def build_query(self, filters):
        query = []
        age = "+@age:[minAge maxAge]"

        for filter_name, value in filters.items():
            # Ugly non-solid way
            if value is not None:
                if filter_name == "firstName" and len(value) > 1:
                    query.append(f"+@first_name:{value}*")
                if filter_name == "lastName" and len(value) > 1:
                    query.append(f"+@last_name:{value}*")
                if filter_name == "email" and len(value) > 1:
                    query.append(f"+@email:{value}*")
                if filter_name == "minAge":
                    age = age.replace("minAge", str(value))
                if filter_name == "maxAge":
                    age = age.replace("maxAge", str(value))
                if filter_name == "isEmployee":
                    query.append(f"+@is_employee:{int(value)}")

        age = age.replace("minAge", "0")
        age = age.replace("maxAge", "100")

        query.append(age)

        return " ".join(query)
Example #22
0
class BaseDocument(object):
    is_redis: bool = True
    query: str = "*"  # the default search string for this document

    class Definition(BaseDefinition):
        # definition template for this document
        pass

    def __init__(self, db, prefix: str = None):
        """
            # rBaseDocument
            A RediSearch document but without imput validation

            ## Param
            conn - Redis connection
            prefix - name of the document i.e. PERSONA or None, in this case we take the name of the class

            ## Remarks
            After the index creation (first time) the index definition is no longer synced with 
            the database. You must maintain manually the changes on Redis or simply delete the
            index with: 
            
            ```> FT.DROPINDEX idx:movie```

            And let redis to recreate it. This is usually fast but can't be an option in a production environment.
        """
        self.db = db
        if not prefix:
            prefix = type(self).__name__.upper()
        self.prefix = prefix.upper()
        self.idx = Client(f"idx{self.db.delim}{self.prefix}", conn=db.r)

        # build index list for RediSearch and columns for an html table of the data
        index = []
        self.columns = [
        ]  # list to columns to appear in an auto generated html table
        self.dependant = []  # fields that depends of a foreign key
        self.index = []  # list of index field names
        self.uniques = []  # list of fields that must be uniques
        logger.debug(f"Members of document type {self.prefix}")
        for field in self.Definition():
            logger.debug(f"{field.name}({field.type}): {field.render_kw}")
            if field.render_kw:
                # include field in index
                if field.render_kw.get('indexed', False):
                    self.index.append(
                        field.name)  # append to index field names list
                    if field.type in ('DecimalField', 'FloatField',
                                      'IntegerField'):
                        index.append(NumericField(field.name, sortable=True))
                    else:
                        index.append(TextField(field.name, sortable=True))
                # include field in html table columns
                if field.render_kw.get('on_table', False):
                    self.columns.append(field.name)
                # the field has unique values
                if field.render_kw.get('unique', False):
                    self.uniques.append(field.name)  # append to uniques
                    if not field.name in self.index:  # append to index list
                        self.index.append(field.name)
                        if field.type in ('DecimalField', 'FloatField',
                                          'IntegerField'):
                            index.append(
                                NumericField(field.name, sortable=True))
                        else:
                            index.append(TextField(field.name, sortable=True))

        # build index
        try:
            self.idx.create_index(
                index,
                definition=IndexDefinition(
                    prefix=[f'{self.prefix}{self.db.delim}']))
        except Exception as ex:
            pass

    def info(self) -> str:
        s = f"{self.prefix} information"
        print(f"\n{s}\n" + '=' * len(s))
        print(
            f"Document members: {[(f.name,f.type) for f in self.Definition()]}"
        )
        print(f"Indices: {self.index}")
        print(f"Foreign keys: {self.dependant}")
        l = []
        for a, b in self.db.dependants:
            if b.prefix == self.prefix:
                l.append(a.prefix)
        print(f"Documents that depend of this document: {l}")
        print(f"Unique members: {self.uniques}")
        print(f"Number of documents: {self.search('*').total}")
        print("")

    def k(self, id: str) -> str:
        """ return a complete id: name+delim+id """
        return self.sanitize(id)

    def get(self, id: str) -> DotMap:
        """ return a document or None 
            ## Param
            * id - is the full id 
        """
        p = self.db.r.hgetall(self.sanitize(id))
        if p:
            return DotMap(self.unescape_doc(self.discover(p)))
        else:
            return None

    def validate_foreigns(self, doc: dict) -> None:
        """ Called before save.
            Check if the object has the mandatory foreign fields and their values exists on the referenced document.

            Also check the uniqueness of unique fields

            ## Param 
            * doc - the dict to be saved in the document

            ## Exceptions
            rFKNotExists, rUnique
        """
        for d, f in self.db.dependants:
            if d.prefix == self.prefix:
                if doc.get(f.prefix.lower()) is None:
                    raise rFKNotExistsException(
                        f"The member {f.prefix.lower()} of {self.prefix} does not exist in the document.",
                        doc)
                if not self.db.r.exists(doc.get(f.prefix.lower())):
                    raise rFKNotExistsException(
                        f"The member {d.prefix}.{f.prefix.lower()}, with value {doc.get(f.prefix.lower())}, does not exist as a foreign key of {f.prefix.upper()}",
                        doc)

        # test uniqueness
        for d in self.uniques:
            q = f"@{d}:\"{doc.get(d)}\""
            if doc.get(d) and self.search(q).total > 0:
                # print(f"testing uniqueness of {d} by searching {q}")
                raise rUniqueException(
                    f"Value {self.db.qunescape(doc.get(d))} already exists in document {self.prefix}, member {d}"
                )

    def escape_doc(self, doc: dict) -> dict:
        """ qescape all str fields """
        esc_doc = {}
        for k, v in doc.items():
            if type(v).__name__ == 'str':
                esc_doc[k] = self.db.qescape(v)
            else:
                esc_doc[k] = v
        return esc_doc

    def before_save(self, doc: dict) -> dict:
        """ Check, sanitize, etc... 
            Raise Exception on error
            ## Param            
            * doc - The dict to be saved, before perform the checkin

            ## Exceptions
            rBeforeSaveException
            e.g. if doc.get('field_name') is None:
                    raise rBeforeSaveException(f"field_name can not be None")

            ## Return            
            The checked, sanitized doc
        """
        # 1. check types and escape strings
        # check if all members of the doc are string, int or float
        new_doc = {}
        try:
            for k, v in doc.items():
                # print(f"type of {k} is {type(v).__name__}")
                # if it is a DotMap, only include the id or None
                t = type(v).__name__
                if t in ('DotMap', 'dict'):
                    new_doc[k] = v.get('id', None)
                elif t in ('int', 'NoneType'):
                    new_doc[k] = v
                elif t in ('str', ):
                    new_doc[k] = self.db.qescape(v)
                elif t in ('Arrow', 'datetime', 'date', 'time'):
                    new_doc[k] = str(arrow.get(v))  # normalize to iso
                else:
                    new_doc[k] = str(v)
        except Exception as ex:
            raise rTypeException(
                f"Error checkin datatypes, only str, int or float allowed: {ex}"
            )

        # 2. validate fks
        self.validate_foreigns(new_doc)
        return new_doc

    def sanitize(self, id: str) -> str:
        """ Sanitize and id before use it 
            
            ## Param 
            * id - the str to sanitize
            ## Exceptions
            rsaveException if the key is invalid (len==0)
        """
        # sanitize the id -> remove non alpha-numeric characters and the delimitator from the id
        id = self.db.delim.join(
            [self.db.key_sanitize(t) for t in id.split(self.db.delim)])

        # remove any delim character after the document name
        if id.startswith(self.prefix + self.db.delim):
            id_part = ''.join([t for t in id.split(self.db.delim)[1:]])
            if len(id_part) == 0:
                raise rSaveException("Len of id cant be zero", {'id': id})
            id = f"{self.prefix}{self.db.delim}{id_part}"
        else:
            # prefix the id with the document name
            id = self.db.k(self.prefix, id)
        return id.upper()

    def after_save(self, doc: dict, id: str) -> None:
        """ Do tasks after save
            ## Param 
            * doc - the saved dict
            * id  - the id of the saved doc
            ## Exceptions
            rAfterSaveException
        """
        return None

    def s(self, **doc: dict) -> str:
        """ call save with func params as a dict """
        return self.save(doc)

    def save(self, doc: DotMap) -> str:
        """ save the dictionary and return his id """
        try:
            # if there isn't an id field, create and populate it
            if doc.get('id', None) is None:
                # the counters always ends with _KEY
                NOM_COMPTADOR = f"{self.prefix.upper()}_KEY"
                # create the counter if it not exists
                n = self.db.r.get(NOM_COMPTADOR)
                if n is None:
                    self.db.r.set(NOM_COMPTADOR, 1)
                    n = 1
                # rpad with zeros
                doc['id'] = f'{n}'.rjust(8, '0')
                self.db.r.incr(NOM_COMPTADOR)

            # sanitize the id
            doc['id'] = self.sanitize(doc['id'])

            # call before_save, can raise an exception
            doc = self.before_save(doc)

            # si no hi ha camp de creacio, el cream i el populam
            if doc.get('created_at', None) is None:
                doc['created_at'] = self.db.now()

            # el camp updated_on el populam sempre
            doc['updated_at'] = self.db.now()

            # salvam el diccionari
            self.idx.redis.hset(doc['id'], mapping=doc)

            # cridam after save
            self.after_save(doc, doc['id'])

            return doc['id']
        except Exception as ex:
            logger.error(
                f"Database error while saving doc id {doc.get('id')}: {ex}")
            raise rSaveException(ex, doc)

    def before_delete(self, id: str) -> None:
        """ Check if we can delete this document 
            At this stage, we can delete if this document is not the key of a foreign key
            raising an Exception if not
            ## Param
            * id - is the complete id prefix:id
            ## Exception
            rBeforeDeleteException
        """
        id = self.sanitize(id)
        for d in self.db.dependants:
            # dependants està organitzat com p.e. (PERSONA, PAIS)
            # miram si la dependència s'aplica a aquest document
            if (self.prefix == d[1].prefix
                ):  # volem esborrar un pais i persona en depén
                # print(f"{d[0].prefix} depén de {self.prefix}, comprovant si hi ha algun doc a {d[0].prefix} amb la clau {id}")
                cad = f'@{d[1].prefix.lower()}:{id}'
                # print(f"La cadena de busqueda a {d[0].prefix} es {cad}")
                if d[0].search(cad).total > 0:
                    raise rDeleteFKException(
                        f"Cant delete {id} of {self.prefix} because there are document of {d[0].prefix} that have this key.",
                        {"id": id})

    def after_delete(self, id: str) -> None:
        """ Perform some action after deletion
            ## Param
            * id - the complete id prefix:id
            * doc - the deleted document
            ## rAfterDeleteException
        """
        pass

    def delete(self, id: str) -> None:
        """ Remove a key from the hash.
            before_delete can throw an Exception

            ## Param
            * id - the complete id prefix:id            

            ## Exceptions
            rDeleteException
        """
        id = self.sanitize(id)
        self.before_delete(id)
        try:
            self.db.r.delete(id)
        except Exception as ex:
            raise rDeleteException(ex, {'id': id})
        self.after_delete(id)

    def unescape_doc(self, doc: dict) -> dict:
        """ qunescape all str fields """
        esc_doc = {}
        for k, v in doc.items():
            if type(v).__name__ == 'str':
                esc_doc[k] = self.db.qunescape(v)
            else:
                esc_doc[k] = v
        return esc_doc

    def discover(self, doc: dict) -> DotMap:
        """ discover first level foreign keys and include the result into the dict """
        n = {}
        # for each member of the doc
        for k, v in doc.items():
            # if this field is dependant
            if k.upper() in self.dependant:
                # include a get of the foreign key as member_name.data
                n[k] = self.unescape_doc(DotMap(self.db.r.hgetall(v)))
            else:
                if type(v).__name__ == 'str':
                    n[k] = self.db.qunescape(v)
                else:
                    n[k] = v
        return DotMap(n)

    def search(self,
               query: str = "*",
               start: int = 0,
               num: int = 10,
               sort_by: str = 'id',
               direction: bool = True,
               slop=0) -> list:
        """ perform a query with the index
            ## Param
            * query - is the string query
            * start - page form record start
            * num - number of records to include into the result
            * sort_by - field to order by, defaul: *id*
            * direction - asc True desc False
            * slop - number of non matched terms (Levensthein distance), default: *0*
            ## Exception
            rSearchException
            ## Return 
            A list of records
        """
        try:
            q = Query(query).slop(slop).sort_by(sort_by,
                                                direction).paging(start, num)
            result = self.idx.search(q)
            if len(self.dependant) == 0:
                return result
            # discover first level foreign keys
            docs = result.docs
            if result.total > 0:  # and len(self.dependant)>0:
                docs_with_discover = []  # new list of docs
                # for each document
                for doc in self.db.docs_to_dict(result.docs):
                    # append to the list of new docs
                    docs_with_discover.append(self.discover(doc))
                docs = docs_with_discover
            # return the result as a resisearch result
            return DotMap(total=result.total, docs=docs)
        except Exception as ex:
            raise rSearchException(str(ex), {'query': query})

    def paginate(self,
                 query: str = "*",
                 page: int = 1,
                 num: int = 10,
                 sort_by: str = 'id',
                 direction: bool = True,
                 slop: int = 0) -> Pagination:
        try:
            tic = time.perf_counter()
            start = (page - 1) * num
            # count total of docs to calculate the total of pages
            total = self.idx.search(Query(query).slop(slop).paging(0, 0)).total
            # construct the query, paginated start and num
            q = Query(query).slop(slop).sort_by(sort_by,
                                                direction).paging(start, num)
            # perform the query
            items = self.idx.search(q).docs
            elapsed_time = time.perf_counter() - tic
            logger.debug(
                f"Pagination over {self.prefix}({query}) with {num} of {total} results done in {(elapsed_time*1000):0.3f}ms"
            )
            p = Pagination(page=page, per_page=num, total=total, items=items)
            return p
        except Exception as ex:
            raise rSearchException(str(ex), {'query': query})
Example #23
0
client.redis.flushdb()
client.create_index([TextField('tweet'), TextField('timestamp')])
start = time.time()
for x, line in enumerate(file.readlines()):
    content = line.strip().split('\t')
    try:
        if len(content) == 4:  # tem data
            client.add_document('-'.join(content[:2]),
                                tweet=content[-2],
                                timestamp=content[-1])
        else:
            client.add_document('-'.join(content[:2]),
                                tweet=content[-1],
                                timestamp='')
    except ResponseError:
        pass
    if x % 1000 == 0:
        print(x, 'lines indexed...')

end = time.time()
print("Indexing time elapsed", end - start)

total = 0
for i in range(30):
    start = time.time()
    res = client.search(Query("@tweet:(ok | fine)"))
    end = time.time()
    total += end - start

print("Query time elapsed", total / 30)