def index_size(index_name): # Creating a client with a given index name # print("in index_size index_name=" + index_name) client = Client(index_name, 'redis', 6379) all_info = client.info() # print(all_info) return all_info
def refresh_search_keys(request): if (request.user.is_authenticated() and request.user.is_staff): client = Client('productIndex') total_old_docts = client.info()['num_docs'] delete_status = client.drop_index() new_index = False if delete_status == 'OK': new_index = create_product_search_index() auto_completer = AutoCompleter('productAutocompleter') auto_completer_old_count = auto_completer.len() create_product_autocompleter() auto_completer_new_count = auto_completer.len() return JsonResponse({'success': True}) else: return JsonResponse({'success': False})
class CSVImporter: def __init__(self, args): self.host = args.host self.port = args.port self.index = args.index self.file = open(args.file, 'r') self.delimiter = args.delimiter self.rows = args.rows self.hasHeader = args.header self.ignore = args.ignore self.docid = args.docid self.client = Client(self.index, self.host, self.port) self.fields = self.client.info()['fields'] def loafFile(self): reader = csv.reader(self.file, delimiter=self.delimiter) if self.hasHeader == True: next(reader) n = 0 for row in reader: if self.rows > 0 and n == self.rows: break self.addRow(row) n += 1 print('Finished loading ' + str(n) + ' rows.') def addRow(self, row): args = {} idx = 0 fieldnum = 0 for val in row: idx += 1 if self.ignore is not None and idx in self.ignore or idx == self.docid: continue args[self.fields[fieldnum][0]] = val fieldnum += 1 doc = 'doc' + str(idx) if self.docid > 0: doc = row[self.docid - 1] self.client.add_document(doc, replace=True, **args)
def get(self, request): # data=request.data mes = {} search_key = request.GET.get('key') print(search_key) all_classes = Course.objects.all() print("开始创建索引——————————————————————————") # 创建一个客户端与给定索引名称 client = Client('CII' + str(datetime.now()), host=settings.SIP, port='6666') # 创建索引定义和模式 client.create_index((TextField('title'), TextField('body'))) print('索引创建完毕————————————————————————————————') print('开始添加数据————————————————————————————————') for i in all_classes: print(str(i.id) + str(i.title)) # 索引文 client.add_document('result' + str(datetime.now()), title=i.title + '@' + str(i.id), info=i.info, language='chinese') print(333333333) print('数据添加完毕————————————————————————————————') print(client.info()) # 查找搜索 client = Client('CII' + str(datetime.now()), host=settings.SIP, port='6666') res = client.search(search_key) print('查询结束————————————————————————————————————————————————') id_list = [] print(res.docs) for i in res.docs: # print(i.title) # 取出title,以@切割,取课程ID查询,然后序列化展示 id = i.title.split('@')[1] id_list.append(id) course = Course.objects.filter(id__in=id_list).all() c = CourseSerializersModel(course, many=True) mes['course'] = c.data mes['code'] = 200 mes['message'] = '搜索完毕' return Response(mes)
) parser_results = parser.parse_args() if parser_results.open: s = str(subprocess.check_output('docker ps -a', shell=True)) if s.__contains__('amadeus'): print( 'Already there are running a redis server with the redisearch module installed' ) exit() open_redis() exit() client = Client('cveIndex') try: client.info() except Exception as e: if e.args[0] != 'Unknown Index name': print( 'You must be running a redis server with the redisearch module installed' ) exit() if parser_results.close: close_redis() exit() if parser_results.a and parser_results.target is None: parser.error( '-a requires -t to specify the target to which apply the analysis')
class CSVImporter: def __init__(self, args): self.host = args.host self.port = args.port self.search = False self.index = args.index if self.index is not None: self.search = True self.search_client = Client(self.index, self.host, self.port) self.info = self.search_client.info()['fields'] self.file = open(args.file, 'r') self.delimiter = args.delimiter self.rows = args.rows self.ignore = args.ignore self.docid = args.docid self.client = redis.Redis(args.host, args.port) self.fields = [] def addRow(self, row, num): values = dict() row_id = row[0] geo_id = 'zip-' geo_pos = '' lat = 0 lon = 0 idx = 0 fieldnum = 0 for val in row: idx += 1 if self.fields[idx - 1] == 'regionidzip': geo_id += val if self.fields[idx - 1] == 'latitude': lat = float(val) / 1000000 if self.fields[idx - 1] == 'longitude': lon = float(val) / 1000000 if self.ignore is not None and idx in self.ignore or idx == self.docid: continue if self.search == True and self.info[fieldnum][2] == 'NUMERIC' and val == '': val = '0' values[self.fields[idx - 1]] = val fieldnum += 1 values['geopos'] = str(lon) + ',' + str(lat) geo_vals = [lon, lat, row_id] self.client.geoadd(geo_id, *geo_vals) if self.search == True: doc = 'doc-' + str(num) if self.docid > 0: doc = row[self.docid - 1] self.search_client.add_document(doc, replace=True, **values) else: self.client.hmset(row_id, values) def loafFile(self): reader = csv.reader(self.file, delimiter=self.delimiter) self.fields = next(reader) n = 0 for row in reader: if self.rows > 0 and n == self.rows: break self.addRow(row, n) n += 1 print('Finished loading ' + str(n) + ' rows.')
class CSVImporter: def __init__(self, args): self.host = args.host self.port = args.port self.index = args.index self.file = open(args.file, 'r') self.delimiter = args.delimiter self.rows = args.rows self.hasHeader = args.header self.ignore = args.ignore self.docid = args.docid self.nosave = args.nosave self.date = args.date self.format = args.format self.client = Client(self.index, self.host, self.port) self.fields = self.client.info()['fields'] def dateToMillis(self, val): try: d = datetime.strptime(val, self.format) except ValueError: print("Invalid data format: " + val) return 0 return str(int(d.strftime('%s')) * 1000) def adjustTagValue(self, val, sep): i = 0 insinglequotes = False indoublequotes = False newTag = False newVal = '' while i < len(val): if val[i] == '\'' and not indoublequotes : insinglequotes = not insinglequotes elif val[i] == '"' and not insinglequotes: indoublequotes = not indoublequotes; else: if val[i] == ',' and not insinglequotes and not indoublequotes: newVal += sep else: newVal += val[i] i += 1 newVal = re.sub('[\t ]*' + sep + '[\t ]*', sep, newVal) return re.sub('[\[\]]', '', newVal) def addRow(self, row, num): args = {} idx = 0 fieldnum = 0 for val in row: idx += 1 if self.ignore is not None and idx in self.ignore or idx == self.docid: continue if self.date is not None and idx in self.date: val = self.dateToMillis(val) if self.fields[fieldnum][2] == 'TAG': val = self.adjustTagValue(val, self.fields[fieldnum][4]) args[self.fields[fieldnum][0]] = val fieldnum += 1 doc = 'doc-' + str(num) if self.docid > 0: doc = row[self.docid - 1] self.client.add_document(doc, replace=True, nosave=self.nosave, **args) def loafFile(self): reader = csv.reader(self.file, delimiter=self.delimiter) if self.hasHeader == True: next(reader) n = 0 for row in reader: if self.rows > 0 and n == self.rows: break self.addRow(row, n) n += 1 print('Finished loading ' + str(n) + ' rows.')
class SearchDemo: def __init__(self, args): self.index = args.index self.client = Client(self.index, host=args.host, port=args.port) self.redis = redis.Redis(args.host, args.port) self.fields = [] info = self.client.info()['fields'] for f in info: self.fields.append(f[0]) def printSearchResults(self, total, docs, headers, duration, withid=False): print('Number of results: ' + str(total)) if total == 0: return data = [] for d in docs: row = [] if withid == True: row.append(d.id) for f in headers: val = '' try: val = getattr(d, f) except AttributeError: pass if val is not None: val = val.decode('utf-8') row.append(val) data.append(row) if withid == True: headers.insert(0, 'Document ID') print(tabulate(data, headers, tablefmt='grid', floatfmt=".2f")) print('Execution time : ' + str(duration) + ' ms') def getHeadersFromRow(self, row): headers = [] i = 0 for t in row: if i % 2 == 0: headers.append(t) i += 1 return headers def getValuesFromRow(self, row, ignore): values = [] i = 0 for t in row: if i % 2 == 1 and i not in ignore: if t is not None: t = t.decode('utf-8') values.append(t) i += 1 return values def getIgnoreList(self, headers): ignore = [] ret = headers[:] i = 0 for h in headers: if h.startswith('__generated_alias'): ignore.append(i + 1) ret.remove(h) i += 2 return ret, ignore def printAggregateResult(self, res, duration, ignore_generated=True): num_results = res[0] print('Number of results: ' + str(res[0])) if num_results == 0: return rows = res[1:] headers = self.getHeadersFromRow(rows[0]) ignore = [] if ignore_generated == True: headers, ignore = self.getIgnoreList(headers) data = [] for r in rows: data.append(self.getValuesFromRow(r, ignore)) print(tabulate(data, headers, tablefmt='grid', floatfmt=".2f")) print('Execution time : ' + str(duration) + ' ms') def queryToArgs(self, query): query = " ".join(query.split()) data = StringIO(query) reader = csv.reader(data, delimiter=' ') return next(reader) def readQuery(self): query = raw_input('Enter query:') return query.strip() def executeQuery(self, query): res = None parts = self.queryToArgs(query) try: st = datetime.datetime.now() res = self.redis.execute_command(*parts) duration = (datetime.datetime.now() - st).total_seconds() * 1000 except redis.exceptions.ResponseError, e: print('Error: ' + str(e)) return nocontent = res[0] == 0 or not isinstance(res[2], (list, )) if parts[0].upper().startswith('FT.SEARCH'): results = Result(res, not nocontent, duration=duration, has_payload=False) if nocontent == True: self.printSearchResults(results.total, results.docs, [], duration, True) else: self.printSearchResults(results.total, results.docs, self.getHeadersFromRow(res[2]), duration, True) else: self.printAggregateResult(res, duration)
if title: title = title[0] else: title = "" abstract = re.findall('<Abstract>([\s\S]*?)</Abstract>', r) if abstract: abstract = re.sub("\n\s*", "", abstract[0]) abstract = re.sub('<AbstractText Label="(.*?)".*?>', " \\1: ", abstract) abstract = re.sub("<\/*Abstract.*?>", "", abstract) abstract = re.sub("<Copyright.*?>.*</Copyright.*?>", "", abstract) abstract = re.sub("\(PsycINFO Database Record", "", abstract) else: abstract = "" # type is ignored for now, but for future reference... type = re.findall("<PublicationType UI=.*?>(.*?)</PublicationType>", r) if type: type = str(type) else: type = str([]) indexer.add_document(pmid, replace=True, abstract=abstract, title=title, type=type) # flush any remaining documents indexer.commit() docs = client.info()['num_docs'] logging.info(datetime.now().isoformat() + " imported " + str(count) + " records from " + sys.argv[1] + "(" + docs + " total)")
class TAS_Redisearch(): #Constructor def __init__(self, table_name, host="localhost", port=6381): try: self.client = Client(table_name, host, port) self.host = host self.port = port self.table_name = table_name self.redis = Redis() self.LIMIT = 10 except Exception as e: print 'yyy' print >> sys.stderr, "TAS_Redisearch Error inside Constructor Index:\'", table_name, "\' HOST:\'", host, "\' PORT:\'", port, "\'\n" print >> sys.stderr, e #Will set the no of results to show def set_result_limit(self, num): self.LIMIT = num return #Defines the schema for Redisearch def set_schema(self, schema): try: return self.client.create_index( schema, False, False, [] ) #last empty list will ensure that default stopwords will not be ignored except Exception as e: print >> sys.stderr, "TAS_Redisearch Error inside set_schema Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n" print >> sys.stderr, e #Deletes index(table) def drop_index(self): try: return self.client.drop_index() except Exception as e: print >> sys.stderr, "TAS_Redisearch Error inside drop_index Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n" print >> sys.stderr, e #Deletes a document(row) by document_index def delete_document(self, document_index): try: return self.client.delete_document(document_index) except Exception as e: print >> sys.stderr, "TAS_Redisearch Error inside delete_document Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n" print >> sys.stderr, e #############################################SEARCHES BELOW####################################### #Uses python libraries def py_search(self, query, result_limit=-1): if result_limit == -1: result_limit = self.LIMIT try: return self.client.search(Query(query).paging(0, result_limit)) except Exception as e: print >> sys.stderr, "TAS_Redisearch Error inside py_search Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n" print sys.stderr, e #Search with default parameters [will return dictionary] def generic_search(self, search_text, result_limit=-1): if result_limit == -1: result_limit = self.LIMIT query_string = "FT.SEARCH " + self.table_name + " " + search_text + " LIMIT 0 " + str( result_limit) try: res = self.redis.execute_command(query_string) return Result(res, True) except Exception as e: print >> sys.stderr, "TAS_Redisearch Error inside generic_search Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n" print >> sys.stderr, e def free_exact_search(self, key, result_limit=-1): org_key = key l = [] try: if result_limit == -1: result_limit = self.LIMIT key = self.clean_string(key) returned = self.py_search("*", result_limit) for result in returned.docs: result_dict = vars(result) if org_key in result_dict.values(): l.append(result_dict) except Exception as e: print >> sys.stderr, "TAS_Redisearch Error inside value_search Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n" print >> sys.stderr, e return l #{fieldname:[value1, value2], fieldname:[value1, value2]} def exact_search(self, input_dict, result_limit=-1): formed_str = "" l = [] for field, value_list in input_dict.items(): formed_str += "@" + field + ":(" for key in value_list: key = self.clean_string(key) formed_str += "(\'" + key + "\') | " formed_str = formed_str.rstrip(' |') formed_str += ") " print "PASSED: ", formed_str returned = self.py_search(formed_str, result_limit) print "RETURNED:", returned for result in returned.docs: result_dict = vars(result) for itr, ktr in input_dict.items(): if result_dict[itr] in ktr: l.append(result_dict) return l #Search with the passed query def custom_search(self, query_string): try: res = self.redis.execute_command(query_string) return Result(res, True) except Exception as e: print >> sys.stderr, "TAS_Redisearch Error inside custom_search Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n" print >> sys.stderr, e #Search in 'search_in_field' [if any of the element in 'list_to_union' is found then include it in the result def union_search(self, list_to_union, search_in_field): query_string = "FT.SEARCH " + self.table_name + " " union_text = "@" + search_in_field + ":(" for text in list_to_union: union_text += text + "|" union_text = union_text.rstrip("|") union_text += ")" query_string += union_text try: res = self.redis.execute_command(query_string) return Result(res, True) except Exception as e: print >> sys.stderr, "TAS_Redisearch Error inside union_search Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n" print >> sys.stderr, e #will return all the dictionary for all the categories if no arguments are passed def category_taxonomy_dict(self, category='*'): try: cat_taxo_dict = {} total_docs = self.client.info()['num_docs'] query_string = "" if category == '*': query_string = category else: query_string = "@CATEGORY:" + category result = self.py_search(query_string, total_docs) for single_result in result.docs: try: category = single_result.CATEGORY taxoname = single_result.TAXONAME except Exception as ex: pass if not category in cat_taxo_dict: cat_taxo_dict[category] = [] elif taxoname not in cat_taxo_dict[category]: cat_taxo_dict[category].append(taxoname) except Exception as e: sys.stderr, "TAS_Redisearch Error inside category_taxonomy_dict Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n" print >> sys.stderr, e return cat_taxo_dict def total_record(self): try: return int(self.client.info()['num_docs']) except Exception as e: sys.stderr, "TAS_Redisearch Error inside total_records Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n" print >> sys.stderr, e def get_all_records(self): try: total = str(self.total_record()) res = self.redis.execute_command("FT.SEARCH " + self.table_name + " * LIMIT 0 " + total) return Result(res, True) except Exception as e: sys.stderr, "TAS_Redisearch Error inside total_records Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n" print >> sys.stderr, e def clean_string(self, key): key = key.replace(',', ' ') key = key.replace('.', ' ') key = key.replace('<', ' ') key = key.replace('>', ' ') key = key.replace('{', ' ') key = key.replace('}', ' ') key = key.replace('[', ' ') key = key.replace(']', ' ') key = key.replace('"', ' ') key = key.replace('\'', ' ') key = key.replace(':', ' ') key = key.replace(';', ' ') key = key.replace('!', ' ') key = key.replace('@', ' ') key = key.replace('#', ' ') key = key.replace('$', ' ') key = key.replace('%', ' ') key = key.replace('^', ' ') key = key.replace('&', ' ') key = key.replace('*', ' ') key = key.replace('(', ' ') key = key.replace(')', ' ') key = key.replace('-', ' ') key = key.replace('+', ' ') key = key.replace('=', ' ') key = key.replace('~', ' ') return key
import json from redisearch import Client, TextField, TagField with open('wocka.json', 'r') as f: jokes = json.load(f) hostname = 'redis-17235.laurent.cs.redislabs.com' port = 17235 client = Client('jokes', hostname, port) client.create_index((TextField('title'), TextField('body'), TextField('category'), TagField('label'))) for joke in jokes: client.add_document(joke['id'], title=joke['title'], body=joke['body'], category=joke['category'], label=joke['category']) print("number of jokes in the json file: " + str(len(jokes))) info = client.info() print(info)