def _generate_block_indexes_from_scratch() -> None: client = _get_redisearch_index_client(Indexes.block.value) client.create_index([ redisearch.NumericField("block_id", sortable=True), redisearch.NumericField("prev_id", sortable=True), redisearch.NumericField("timestamp", sortable=True), ]) _log.info("Listing all blocks in storage") block_paths = storage.list_objects("BLOCK/") pattern = re.compile(r"BLOCK\/[0-9]+$") for block_path in block_paths: if re.search(pattern, block_path): _log.info(f"Adding index for {block_path}") raw_block = storage.get_json_from_object(block_path) block = cast("model.BlockModel", None) if LEVEL == "1": block = l1_block_model.new_from_stripped_block(raw_block) elif LEVEL == "2": block = l2_block_model.new_from_at_rest(raw_block) elif LEVEL == "3": block = l3_block_model.new_from_at_rest(raw_block) elif LEVEL == "4": block = l4_block_model.new_from_at_rest(raw_block) elif LEVEL == "5": block = l5_block_model.new_from_at_rest(raw_block) put_document(Indexes.block.value, block.block_id, block.export_as_search_index())
def _generate_l5_verification_indexes() -> None: client = _get_redisearch_index_client(Indexes.verification.value) client.drop_index() try: client.create_index( [ redisearch.NumericField("block_id", sortable=True), redisearch.NumericField("prev_id", sortable=True), redisearch.NumericField("timestamp", sortable=True), redisearch.TagField("dc_id"), ] ) except redis.exceptions.ResponseError as e: if not str(e).startswith("Index already exists"): # We don't care if index already exists raise _log.info("Listing all blocks in storage") block_paths = storage.list_objects("BLOCK/") pattern = re.compile(r"BLOCK\/([0-9]+)-([Ll])5(.*)$") for block_path in block_paths: if LEVEL == "1" and BROADCAST_ENABLED and re.search(pattern, block_path): if not client.redis.sismember(L5_BLOCK_MIGRATION_KEY, block_path): raw_block = storage.get_json_from_object(block_path) block = l5_block_model.new_from_at_rest(raw_block) put_document(Indexes.verification.value, block_path.split("/")[1], block.export_as_search_index()) client.redis.sadd(L5_NODES, block.dc_id) client.redis.sadd(L5_BLOCK_MIGRATION_KEY, block_path) else: _log.info(f"Skipping already indexed L5 block {block_path}")
def _get_custom_field_from_input(custom_index_input: "custom_index") -> redisearch.client.Field: input_type = custom_index_input["type"] field_name = custom_index_input["field_name"] options = custom_index_input.get("options") if input_type == "text": weight = 1.0 sortable = False no_stem = False no_index = False if options: sortable = bool(options.get("sortable")) no_stem = bool(options.get("no_stem")) no_index = bool(options.get("no_index")) cust_weight = options.get("weight") if isinstance(cust_weight, (int, float)) and cust_weight >= 0 and cust_weight <= 1: weight = float(cust_weight) return redisearch.TextField(field_name, weight=weight, sortable=sortable, no_stem=no_stem, no_index=no_index) elif input_type == "tag": separator = "," no_index = False if options: separator = options.get("separator") or "," no_index = bool(options.get("no_index")) return redisearch.TagField(field_name, separator=separator, no_index=no_index) elif input_type == "number": sortable = False no_index = False if options: sortable = bool(options.get("sortable")) no_index = bool(options.get("no_index")) return redisearch.NumericField(field_name, sortable=sortable, no_index=no_index) else: raise RuntimeError(f"Index type {input_type} is not supported")
def __init__(self): # setup redis clients self.r = redis.Redis(host=redis_host, port=redis_port) self.rs = redisearch.Client('product_name', host=redis_host, port=redis_port) try: self.rs.create_index( (redisearch.NumericField('id'), redisearch.TextField('name'), redisearch.TextField('description'), redisearch.TextField('vendor'), redisearch.NumericField('price'), redisearch.TextField('currency'), redisearch.TextField('category'), redisearch.TextField('images'))) except Exception: print(f'error creating index') print(f'index info: {self.rs.info()}')
def force_create_transaction_index( index: str, custom_indexes: Optional[Iterable["custom_index"]] = None) -> None: """Create (and overwrite if necessary) index for a transaction type with optional custom_indexes""" # Delete the index with this name if necessary delete_index(index) client = _get_redisearch_index_client(index) # Set standard transaction indexes index_fields = [ redisearch.TextField("tag"), redisearch.NumericField("timestamp", sortable=True), redisearch.NumericField("block_id", sortable=True), ] # Add custom indexes if they exist if custom_indexes: for idx in custom_indexes: index_fields.append(_get_custom_field_from_input(idx)) # Create the actual index client.create_index(index_fields)
def _generate_block_indexes() -> None: client = _get_redisearch_index_client(Indexes.block.value) try: client.create_index([ redisearch.NumericField("block_id", sortable=True), redisearch.NumericField("prev_id", sortable=True), redisearch.NumericField("timestamp", sortable=True), ]) except redis.exceptions.ResponseError as e: if not str(e).startswith("Index already exists" ): # We don't care if index already exists raise _log.info("Listing all blocks in storage") block_paths = storage.list_objects("BLOCK/") pattern = re.compile(r"BLOCK\/[0-9]+$") for block_path in block_paths: if re.search(pattern, block_path): # do a check to see if this block was already marked as indexed if not client.redis.sismember(BLOCK_MIGRATION_KEY, block_path): _log.info(f"Adding index for {block_path}") raw_block = storage.get_json_from_object(block_path) block = cast("model.BlockModel", None) if LEVEL == "1": block = l1_block_model.new_from_stripped_block(raw_block) elif LEVEL == "2": block = l2_block_model.new_from_at_rest(raw_block) elif LEVEL == "3": block = l3_block_model.new_from_at_rest(raw_block) elif LEVEL == "4": block = l4_block_model.new_from_at_rest(raw_block) elif LEVEL == "5": block = l5_block_model.new_from_at_rest(raw_block) put_document(Indexes.block.value, block.block_id, block.export_as_search_index()) client.redis.sadd(BLOCK_MIGRATION_KEY, block_path) else: _log.info(f"Skipping already indexed block {block_path}")
def savetoredis(req_id, colnames, datavalues, expired_time): db.hmset("%s:cols" % req_id, {'cols': colnames}) client = redisearch.Client(req_id) indexes = [] for col in colnames: if "score" in col or "diff" in col or "row" in col or "z_score" in col or "p_value" in col: indexes.append(redisearch.NumericField(col, sortable=True)) else: indexes.append(redisearch.TextField(col, sortable=True)) client.create_index(indexes) for i in range(0, len(datavalues)): fields = { colnames[j]: datavalues[i][colnames[j]] for j in range(0, len(colnames)) } client.add_document("%s_%d" % (req_id, i), **fields) # ---- set expiry for columns and documents ---- #db.expire("%s:cols"%req_id,expired_time) let's comment for now and see how it goes drop_index.apply_async((req_id, ), countdown=expired_time)
class RediSearchSimstring(BaseSimstring): """RediSearch implementation of Simstring algorithm. Args: db (Dict[str, Any]): Options passed directly to 'RediSearchDatabase()'. Kwargs: Options forwarded to 'BaseSimstring()'. """ NAME = 'redisearch-simstring' _FIELDS = ( redisearch.TextField('term', no_stem=True), redisearch.TextField('ng', no_stem=False), redisearch.NumericField('sz', sortable=True), ) def __init__( self, *, # NOTE: Hijack 'db' parameter from 'BaseMatcher' db: Dict[str, Any] = None, **kwargs, ): super().__init__(**kwargs) if db is None: db = {} self._db = RediSearchDatabase( index=db.pop('index', 'facet'), fields=type(self)._FIELDS, **db, ) # NOTE: Use document count as document IDs self._doc_id = len(self._db) def get_strings(self, size: int, feature: str) -> List[str]: """Get strings corresponding to feature size and query feature.""" query = ( redisearch.Query(feature).verbatim().limit_fields('ng').add_filter( redisearch.NumericFilter('sz', size, size)).return_fields('term')) return [document.term for document in self._db.get(query).docs] def insert(self, string: str): """Insert string into database.""" features = self._ngram.get_features(string) # NOTE: RediSearch does not supports storing lists in a field, # so we create a document for each feature. Downside is the high # redundancy of data and extra storage. for i, feature in enumerate(features): self._db.set( str(self._doc_id + i), { 'term': string, 'sz': len(features), 'ng': feature, }, ) self._doc_id += len(features)
def main(): print("hello!") r = redis.Redis(host=redis_host, port=redis_port) rs = redisearch.Client('recordIndex', redis_host, redis_port) # flush to get a fresh db # TODO - remove when dockerized r.flushall() record_collection = [{ 'title': 'Brothers and Sisters', 'artist': 'Allman Brothers', 'year': 1973, 'genre': ['rock', 'southern rock', 'blues rock'] }, { 'title': 'Aja', 'artist': 'Steely Dan', 'year': 1977, 'genre': ['rock', 'pop'] }, { 'title': 'Can\'t Buy a Thrill', 'artist': 'Steely Dan', 'year': 1972, 'genre': ['rock', 'pop'] }, { 'title': 'Deguello', 'artist': 'ZZ Top', 'year': 1979, 'genre': ['rock'] }, { 'title': 'American Beauty', 'artist': 'Grateful Dead', 'year': 1970, 'genre': ['rock', 'psychedelic rock'] }, { 'title': 'Second Helping', 'artist': 'Lynard Skynard', 'year': 1974, 'genre': ['rock', 'southern rock'] }, { 'title': 'The Joker', 'artist': 'Steve Biller Band', 'year': 1973, 'genre': ['rock', 'blues rock'] }, { 'title': 'Book of Dreams', 'artist': 'Steve Biller Band', 'year': 1977, 'genre': ['rock'] }, { 'title': 'Rumours', 'artist': 'Fleetwood Mac', 'year': 1977, 'genre': ['rock', 'pop'] }, { 'title': 'Where We All Belong', 'artist': 'Marshall Tucker Band', 'year': 1974, 'genre': ['rock', 'southern rock'] }] try: rs.create_index((redisearch.TextField('title', sortable=True), redisearch.TextField('artist', sortable=True), redisearch.NumericField('year', sortable=True), redisearch.TagField('genre', separator=','))) except Exception: print(f'Error creating index: {sys.exc_info()}') print(f'index info: {rs.info()}') run = True load_data(rs, record_collection) while run: txt = input("enter a search term: ") if (txt == "quit"): run = False break txt_arr = txt.split(' ', 1) print(f'searching {txt_arr}') if (txt_arr[0] == 'title'): res = rs.search(f'@title:{txt_arr[1]}') print(res) elif (txt_arr[0] == 'artist'): res = rs.search(f'@artist:{txt_arr[1]}') print(res) elif (txt_arr[0] == 'year'): full_txt_arr = txt.split(' ') former = full_txt_arr[1] latter = full_txt_arr[1] if (len(full_txt_arr) == 3): latter = full_txt_arr[2] res = rs.search(f'@year:[{former} {latter}]') print(res) elif (txt_arr[0] == 'genre'): pass else: print("invalid query")