class SearchDemo: def __init__(self, args): self.index = args.index self.client = Client(self.index, host=args.host, port=args.port) def create(self): try: self.client.drop_index() except: pass self.client.create_index([ NumericField('WORDCOUNT', sortable=True), TextField('BYLINE', no_stem=True, sortable=True), TextField('DOCUMENTTYPE', sortable=True), TextField('HEADLINE', sortable=True), TagField('KEYWORDS', separator=';'), NumericField('MULTIMEDIA', sortable=True), TextField('NEWDESK', sortable=True), NumericField('PRINTPAGE', sortable=True), NumericField('PUBDATE', sortable=True), TextField('SECTIONNAME', sortable=True), TextField('SNIPPET', sortable=True), TextField('TYPEOFMATERIAL', sortable=True), TextField('WEBURL') ])
def general_search(request) -> Response: """ Default full text search on all resources if no sources are specified. Faceted search if sources are specified. **query**: Query to search. **source**: Multiple sources can be specifed. """ client = Client(INDEX_NAME, conn=get_redis_connection()) query = request.GET.get('query') sort_stars = request.GET.get('sort-stars') resources = request.GET.getlist('source') languages = request.GET.getlist('language') awesome_lists = request.GET.getlist('awesome-list') query = format_query(query, resources, languages, awesome_lists) results = client.search(Query(query)) results = [doc.__dict__ for doc in results.docs] if sort_stars == "true": results.sort(key=lambda x: int(x['stargazers_count']), reverse=True) return Response({ "docs": results })
def __init__(self): self.r = redis.from_url(config.EVENT_BROKER_URL) self.client = Client('CCTV_DATA') try: self.client.create_index([TextField('CCTV_ID'), TagField('TAGS')]) except Exception as error: print("Error while creatign index", error)
def index_size(index_name): # Creating a client with a given index name # print("in index_size index_name=" + index_name) client = Client(index_name, 'redis', 6379) all_info = client.info() # print(all_info) return all_info
def test(): # Creating a client with a given index name client = Client('myIndex') # Creating the index definition and schema client.drop_index() client.create_index([TextField('title', weight=5.0), TextField('body')]) # Indexing a document client.add_document( 'doc1', title='RediSearch', body='Redisearch implements a search engine on top of redis') # Simple search res = client.search("search engine") # the result has the total number of results, and a list of documents print res.total # "1" print res.docs[0] # Searching with snippets # res = client.search("search engine", snippet_sizes={'body': 50}) # Searching with complex parameters: q = Query("search engine").verbatim().no_content().paging(0, 5) res = client.search(q)
def main(): # Parse arguments parser = argparse.ArgumentParser() parser.add_argument('-u', '--url', help='Redis URL', type=str, default='redis://127.0.0.1:6379') args = parser.parse_args() # Set up Redis connection url = urlparse(args.url) r = redis.StrictRedis(host=url.hostname, port=url.port) rsbeer = Client(ftbeeridx, conn=r) rsbrewery = Client(ftbreweryidx, conn=r) for rsclient in [rsbeer, rsbrewery]: try: cinfo = rsclient.info() except: continue print("dropping index {}".format(cinfo['index_name'])) rsclient.drop_index() print ("Importing categories...") import_csv(r, category, catfile) print ("Importing styles...") import_csv(r, style, stylefile) print ("Importing breweries...") import_csv(r, brewery, breweryfile) print ("Adding brewery geo data to RediSearch...") import_brewery_geo(r, rsbrewery) print ("Adding beer data to RediSearch...") ftadd_beers(r, rsbeer) print ("Done.")
def start(self, data, index_name): status = 1 self.drop_index() self.client = Client(index_name, self.host, self.port) status = 2 schema = [ NumericField('INDEX'), TextField('DATA'), TextField('SECTION_TYPE'), TextField('DOCID'), TextField('PAGE'), TextField('GRIDID'), TextField("ROWCOL"), TextField('BBOX'), TextField("PAGE_GRID_SE"), TextField('Rowspan'), TextField('Colspan') ] #rsObj.set_schema([NumericField('INDEX'), TextField('DOCID'), TextField('CATEGORY'), TextField('TAXONAME'), TextField('VALUE'), TextField('XML_REF'), TextField('REF_KEY')]) status = 3 self.add_indexing_schema(schema) status = 4 self.add_data(data, index_name) status = 5 return [status]
def __init__(self, urls: List[str], max_per_list: int = MAX_RES_PER_LIST): self.urls = urls self.client = Client(INDEX_NAME, host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD) self.keys = Keys(KEY_PREFIX) self.max = max_per_list
def __init__(self, args): self.index = args.index self.client = Client(self.index, host=args.host, port=args.port) self.redis = redis.Redis(args.host, args.port) self.fields = [] info = self.client.info()['fields'] for f in info: self.fields.append(f[0])
def build_ipa_index(): start_time = time.time() rc = redis.Redis(password=os.environ.get('REDIS_PASSWORD', '')) rs_client = Client('IPAIndex', conn=rc) print( 'Getting file `amministrazioni.txt` from https://www.indicepa.gov.it', flush=True) ipa_index_amm_url = 'https://www.indicepa.gov.it/public-services/opendata-read-service.php?dstype=FS&filename=amministrazioni.txt' ipa_index_amm = pd.read_csv(ipa_index_amm_url, sep='\t', dtype=str) print('Getting file `ou.txt` from https://www.indicepa.gov.it', flush=True) ipa_index_ou_url = 'https://www.indicepa.gov.it/public-services/opendata-read-service.php?dstype=FS&filename=ou.txt' ipa_index_ou = pd.read_csv(ipa_index_ou_url, sep='\t', na_values=['da_indicare', '*****@*****.**'], dtype=str) ipa_index_ou = ipa_index_ou.loc[lambda ipa_index_ou: ipa_index_ou['cod_ou'] == 'Ufficio_Transizione_Digitale'] try: rs_client.drop_index() except: pass # Index already dropped rs_client.create_index([ TextField('ipa_code', weight=2.0), TextField('name', weight=2.0, sortable=True), TextField('site'), TextField('pec'), TextField('city', weight=1.4), TextField('county'), TextField('region'), TagField('type'), TextField('rtd_name'), TextField('rtd_pec'), TextField('rtd_mail'), ]) print('Created index `IPAIndex`', flush=True) print('Feeding `IPAIndex` with data from `amministrazioni.txt`', flush=True) for index, row in ipa_index_amm.iterrows(): rs_client.add_document(row['cod_amm'], language='italian', replace=True, **get_ipa_amm_item(row)) print('Feeding `IPAIndex` with data from `ou.txt`', flush=True) for index, row in ipa_index_ou.iterrows(): rs_client.add_document(row['cod_amm'], partial=True, **get_ipa_rtd_item(row)) finish_time = time.time() print('`IPAIndex` build completed in {0} seconds'.format( round(finish_time - start_time, 2)), flush=True)
def __init__(self, host=ip, port=port, db=db, autocomplete_name='Default'): self.client = Client(autocomplete_name,host,port) self.ipAdd = host self.ipPort = port self.db = db self.redisConn = redis.StrictRedis(host=self.ipAdd, port=self.ipPort, db=self.db) self.autocomplete = AutoCompleter(autocomplete_name, host, port) self.escape1 = re.compile(r'&#\d+;') self.escape2 = re.compile(r',|\.|<|>|{|}|[|]|"|\'|:|;|!|@|#|\$|%|\^|&|\*|\(|\)|-|\+|=|~') self.escape3 = re.compile(r'\s+')
def __init__(self, table_name, host="localhost", port=6381): try: self.client = Client(table_name, host, port) self.host = host self.port = port self.table_name = table_name self.redis = Redis() self.LIMIT = 10 except Exception as e: print 'yyy' print >> sys.stderr, "TAS_Redisearch Error inside Constructor Index:\'", table_name, "\' HOST:\'", host, "\' PORT:\'", port, "\'\n" print >> sys.stderr, e
def __init__(self, args): self.host = args.host self.port = args.port self.index = args.index self.file = open(args.file, 'r') self.delimiter = args.delimiter self.rows = args.rows self.hasHeader = args.header self.ignore = args.ignore self.docid = args.docid self.client = Client(self.index, self.host, self.port) self.fields = self.client.info()['fields']
def _setup_client(self, hostname: str, idx_name: str, port=6379) -> None: try: self._client = Client(idx_name, host=hostname, port=port) self._auto_compl = AutoCompleter(idx_name, hostname, port=port) self._hostname = hostname self._port = port self._idx = idx_name self._ready = True LOGGER.info("Cache engine is ready") except: self._client = None LOGGER.error("Cache engine is faulty!")
def __init__(self): self.redis_info = config.Config.redis_info self.ip, self.port, self.db = self.redis_info["host"], self.redis_info[ "port"], self.redis_info["db"] index_name = self.redis_info["tb_name"] self.client = Client(index_name, self.ip, self.port) #self.rd_con = self.make_redis_connection() self.escape1 = re.compile(r'&#\d+;') self.escape2 = re.compile( r',|\.|<|>|{|}|[|]|"|\'|:|;|!|@|#|\$|%|\^|&|\*|\(|\)|-|\+|=|~') self.escape3 = re.compile(r'\s+') pass
def create_index(cls): error_message = "Unable to create Index. Try Again" redis_enabled = os.getenv("REDIS_SEARCH", False) if redis_enabled: client = Client("tower", port=6379, host=os.getenv('REDIS_HOST')) try: client.create_index(document) cls.build_index(client) print("Watcher Index created successfully") except ResponseError as err: print(err) else: print(error_message)
def before_request(): g.redis = redis.StrictRedis( host=app.config['REDIS_HOST'], port=app.config['REDIS_PORT'], ) g.rsbeer = Client( 'beerIdx', conn=g.redis ) g.rsbrewery = Client( 'breweryIdx', conn=g.redis )
def refresh_search_keys(request): if (request.user.is_authenticated() and request.user.is_staff): client = Client('productIndex') total_old_docts = client.info()['num_docs'] delete_status = client.drop_index() new_index = False if delete_status == 'OK': new_index = create_product_search_index() auto_completer = AutoCompleter('productAutocompleter') auto_completer_old_count = auto_completer.len() create_product_autocompleter() auto_completer_new_count = auto_completer.len() return JsonResponse({'success': True}) else: return JsonResponse({'success': False})
class RandomWikipediaImport(object): def __init__(self): self.rs = Client('wikipedia') self.rs.create_index((TextField('title', weight=5.0), TextField('body'))) print(f'>>> Created index') def insert_random_loop(self): i = 1 while True: ra = wikipedia.random() article = wikipedia.page(ra) self.rs.add_document(f'doc{i}', title=article.title, body=article.content) print(f'>>> Inserted {article.title}') i += 1
def __init__(self, args): self.host = args.host self.port = args.port self.search = False self.index = args.index if self.index is not None: self.search = True self.search_client = Client(self.index, self.host, self.port) self.info = self.search_client.info()['fields'] self.file = open(args.file, 'r') self.delimiter = args.delimiter self.rows = args.rows self.ignore = args.ignore self.docid = args.docid self.client = redis.Redis(args.host, args.port) self.fields = []
def get_indexed_client(self): return Client( "idx:stock", host=self.hostname, port=self.port, password=self.password, )
def searchdb(search_content): global total client = Client("BoxGroup", port=6379) search_content = ' '.join(jieba.cut(search_content)) q = Query(search_content).verbatim().paging(0, 500) res = client.search(q) total = res.total titlelist = [] i = 0 while i < res.total: titlelist.append(res.docs[i].title) i += 1 if res.total > 0: return titlelist elif res.total == 0: return "No result found"
class CacheEngine: def __init__(self, hostname: str, idx_name: str, port=6379) -> None: self._ready = False self._setup_client(hostname, idx_name, port) def _setup_client(self, hostname: str, idx_name: str, port=6379) -> None: try: self._client = Client(idx_name, host=hostname, port=port) self._auto_compl = AutoCompleter(idx_name, hostname, port=port) self._hostname = hostname self._port = port self._idx = idx_name self._ready = True LOGGER.info("Cache engine is ready") except: self._client = None LOGGER.error("Cache engine is faulty!") def add_doc(self, doc_id: str, data: dict) -> Any: if dict is None: return False results = self._client.redis.hset(doc_id, mapping=data) return results def search(self, text_to_search: str) -> Result: results: Result = self._client.search(text_to_search) return results def get_doc(self, doc_id) -> Document: try: data = self._client.load_document(doc_id) return data except: return None def add_suggestion(self, suggestion) -> bool: results = None try: results = self._auto_compl.add_suggestions(Suggestion(suggestion)) except: return False return True def get_suggestion(self, str_to_suggest: str) -> List: suggs = self._auto_compl.get_suggestions(str_to_suggest, fuzzy=len(str_to_suggest) > 3) return suggs
def search(cls, query, offset=0, paginate=10): client = Client("tower", port=6379, host=os.getenv('REDIS_HOST')) q = Query(query).paging(offset, paginate) res = client.search(q) result = [] for doc in res.docs: value_dict = { 'id': doc.id, 'client_ip': doc.clientIp, 'service': doc.service, 'error_message': doc.errorMessage, 'stack_trace': doc.stackTrace, 'numberRange': doc.numberRange } result.append(value_dict) print(res) return result
class RediSearchClient(object): def __init__(self, index_name): self.client = Client(index_name) self.index_name = index_name def build_index(self, line_doc_path, n_docs): line_pool = LineDocPool(line_doc_path) try: self.client.drop_index() except: pass self.client.create_index([TextField('title'), TextField('url'), TextField('body')]) for i, d in enumerate(line_pool.doc_iterator()): self.client.add_document(i, nosave = True, title = d['doctitle'], url = d['url'], body = d['body']) if i + 1 == n_docs: break if i % 1000 == 0: print "{}/{} building index".format(i, n_docs) def search(self, query): q = Query(query).paging(0, 5).verbatim() res = self.client.search(q) # print res.total # "1" return res
def start(self, data, doc_id, company, project): status = 1 index_name = project + "_DOCUMENT_" + str(doc_id) self.drop_index() self.client = Client(index_name, self.host, self.port) status = 2 schema = [ NumericField('INDEX'), TextField('DATA'), TextField('PAGE'), TextField('BBOX') ] status = 3 self.add_indexing_schema(schema) status = 4 self.add_data(data, company, doc_id, project) status = 5 return [status]
def clientpush(self): client = Client('Checkout') client.create_index([ NumericField('Key'), TextField('UsageClass'), TextField('CheckoutType'), TextField('MaterialType'), NumericField('CheckoutYear'), NumericField('CheckoutMonth'), NumericField('Checkouts'), TextField('Title'), TextField('Creator'), TextField('Subjects'), TextField('Publisher'), TextField('PublicationYear') ]) db_connection, _ = self.connect() cursor = db_connection.cursor() cursor.execute('SELECT * FROM customers') results = cursor.fetchall() i = 0 for result in results: client.add_document('doc%s' % i, Key=result[0], UsageClass=result[1], CheckoutType=result[2], MaterialType=result[3], CheckoutYear=result[4], CheckoutMonth=result[5], Checkouts=result[6], Title=result[7], Creator=result[8], Subjects=result[9], Publisher=result[10], PublicationYear=result[11]) i += 1 print(i) res = client.search('BOOK') print("{} {}".format(res.total, res.docs[0].Title)) res1 = client.search("use") print(res1) q = Query('use').verbatim().no_content().paging(0, 5) res1 = client.search(q) print(res1) cursor.close() db_connection.close()
class EventProcessor(): def __init__(self): self.r = redis.from_url(config.EVENT_BROKER_URL) self.client = Client('CCTV_DATA') try: self.client.create_index([TextField('CCTV_ID'), TagField('TAGS')]) except Exception as error: print("Error while creatign index", error) # self.client.create_index([TextField('title', weight=5.0), TextField('body')]) def get_objects_in_image(self, image): # TODO: call RedisAI module objects = [ "key", "passport", "wallet", "car", "bag", "watch", "book", "satchel", "laptop", "camera", "mobile_phone" ] tags = [] tags.append(objects[r.randint(0, 10)]) tags.append(objects[r.randint(0, 10)]) tags.append(objects[r.randint(0, 10)]) tags.append(objects[r.randint(0, 10)]) return tags def process(self, msg): print("Going to process message and and store it", msg) # print(float(msg["LON"]), float(msg["LAT"]), msg["CCTV_ID"]) # print(type(float(msg["LON"])), type(float(msg["LAT"])), msg["CCTV_ID"]) try: self.r.geoadd("CCTV_LOCATION", float(msg["LON"]), float(msg["LAT"]), msg["CCTV_ID"]) msg["TAGS"] = self.get_objects_in_image(msg.get("IMAGE", "")) # print("Going to store this in search", msg) doc_unique_key = msg["CCTV_ID"] + "_" + msg["TS"] self.client.add_document(doc_unique_key, CCTV_ID=doc_unique_key, TAGS=",".join(msg["TAGS"])) except Exception as error: print("Error while adding ccty data", error)
def product_search(query, limit=10, fuzzy_search=True): search_results = {"from_redisearch": True, "results": []} if not is_redisearch_enabled(): # Redisearch module not enabled search_results["from_redisearch"] = False search_results["results"] = get_product_data(query, 0, limit) return search_results if not query: return search_results red = frappe.cache() query = clean_up_query(query) # TODO: Check perf/correctness with Suggestions & Query vs only Query # TODO: Use Levenshtein Distance in Query (max=3) ac = AutoCompleter(make_key(WEBSITE_ITEM_NAME_AUTOCOMPLETE), conn=red) client = Client(make_key(WEBSITE_ITEM_INDEX), conn=red) suggestions = ac.get_suggestions( query, num=limit, fuzzy=fuzzy_search and len(query) > 3 # Fuzzy on length < 3 can be real slow ) # Build a query query_string = query for s in suggestions: query_string += f"|('{clean_up_query(s.string)}')" q = Query(query_string) results = client.search(q) search_results["results"] = list(map(convert_to_dict, results.docs)) search_results["results"] = sorted( search_results["results"], key=lambda k: frappe.utils.cint(k["ranking"]), reverse=True) return search_results
def index(): client = Client('sh') # client.drop_index() client.create_index(txt=1.0) chapters = {} with open('will_play_text.csv') as fp: r = csv.reader(fp, delimiter=';') for line in r: #['62816', 'Merchant of Venice', '9', '3.2.74', 'PORTIA', "I'll begin it,--Ding, dong, bell."] play, chapter, character, text = line[1], line[2], line[4], line[5] d = chapters.setdefault('{}:{}'.format(play, chapter), {}) d['play'] = play d['text'] = d.get('text', '') + ' ' + text for chapter, doc in chapters.iteritems(): print chapter, doc client.add_document(chapter, nosave=True, txt=doc['text'])
class CSVImporter: def __init__(self, args): self.host = args.host self.port = args.port self.index = args.index self.file = open(args.file, 'r') self.delimiter = args.delimiter self.rows = args.rows self.hasHeader = args.header self.ignore = args.ignore self.docid = args.docid self.client = Client(self.index, self.host, self.port) self.fields = self.client.info()['fields'] def loafFile(self): reader = csv.reader(self.file, delimiter=self.delimiter) if self.hasHeader == True: next(reader) n = 0 for row in reader: if self.rows > 0 and n == self.rows: break self.addRow(row) n += 1 print('Finished loading ' + str(n) + ' rows.') def addRow(self, row): args = {} idx = 0 fieldnum = 0 for val in row: idx += 1 if self.ignore is not None and idx in self.ignore or idx == self.docid: continue args[self.fields[fieldnum][0]] = val fieldnum += 1 doc = 'doc' + str(idx) if self.docid > 0: doc = row[self.docid - 1] self.client.add_document(doc, replace=True, **args)