예제 #1
0
def _generate_block_indexes_from_scratch() -> None:
    client = _get_redisearch_index_client(Indexes.block.value)
    client.create_index([
        redisearch.NumericField("block_id", sortable=True),
        redisearch.NumericField("prev_id", sortable=True),
        redisearch.NumericField("timestamp", sortable=True),
    ])
    _log.info("Listing all blocks in storage")
    block_paths = storage.list_objects("BLOCK/")
    pattern = re.compile(r"BLOCK\/[0-9]+$")
    for block_path in block_paths:
        if re.search(pattern, block_path):
            _log.info(f"Adding index for {block_path}")
            raw_block = storage.get_json_from_object(block_path)
            block = cast("model.BlockModel", None)
            if LEVEL == "1":
                block = l1_block_model.new_from_stripped_block(raw_block)
            elif LEVEL == "2":
                block = l2_block_model.new_from_at_rest(raw_block)
            elif LEVEL == "3":
                block = l3_block_model.new_from_at_rest(raw_block)
            elif LEVEL == "4":
                block = l4_block_model.new_from_at_rest(raw_block)
            elif LEVEL == "5":
                block = l5_block_model.new_from_at_rest(raw_block)
            put_document(Indexes.block.value, block.block_id,
                         block.export_as_search_index())
예제 #2
0
def _generate_l5_verification_indexes() -> None:
    client = _get_redisearch_index_client(Indexes.verification.value)
    client.drop_index()
    try:
        client.create_index(
            [
                redisearch.NumericField("block_id", sortable=True),
                redisearch.NumericField("prev_id", sortable=True),
                redisearch.NumericField("timestamp", sortable=True),
                redisearch.TagField("dc_id"),
            ]
        )
    except redis.exceptions.ResponseError as e:
        if not str(e).startswith("Index already exists"):  # We don't care if index already exists
            raise
    _log.info("Listing all blocks in storage")
    block_paths = storage.list_objects("BLOCK/")
    pattern = re.compile(r"BLOCK\/([0-9]+)-([Ll])5(.*)$")
    for block_path in block_paths:
        if LEVEL == "1" and BROADCAST_ENABLED and re.search(pattern, block_path):
            if not client.redis.sismember(L5_BLOCK_MIGRATION_KEY, block_path):
                raw_block = storage.get_json_from_object(block_path)
                block = l5_block_model.new_from_at_rest(raw_block)
                put_document(Indexes.verification.value, block_path.split("/")[1], block.export_as_search_index())
                client.redis.sadd(L5_NODES, block.dc_id)
                client.redis.sadd(L5_BLOCK_MIGRATION_KEY, block_path)
            else:
                _log.info(f"Skipping already indexed L5 block {block_path}")
예제 #3
0
def _get_custom_field_from_input(custom_index_input: "custom_index") -> redisearch.client.Field:
    input_type = custom_index_input["type"]
    field_name = custom_index_input["field_name"]
    options = custom_index_input.get("options")
    if input_type == "text":
        weight = 1.0
        sortable = False
        no_stem = False
        no_index = False
        if options:
            sortable = bool(options.get("sortable"))
            no_stem = bool(options.get("no_stem"))
            no_index = bool(options.get("no_index"))
            cust_weight = options.get("weight")
            if isinstance(cust_weight, (int, float)) and cust_weight >= 0 and cust_weight <= 1:
                weight = float(cust_weight)
        return redisearch.TextField(field_name, weight=weight, sortable=sortable, no_stem=no_stem, no_index=no_index)
    elif input_type == "tag":
        separator = ","
        no_index = False
        if options:
            separator = options.get("separator") or ","
            no_index = bool(options.get("no_index"))
        return redisearch.TagField(field_name, separator=separator, no_index=no_index)
    elif input_type == "number":
        sortable = False
        no_index = False
        if options:
            sortable = bool(options.get("sortable"))
            no_index = bool(options.get("no_index"))
        return redisearch.NumericField(field_name, sortable=sortable, no_index=no_index)
    else:
        raise RuntimeError(f"Index type {input_type} is not supported")
예제 #4
0
 def __init__(self):
     # setup redis clients
     self.r = redis.Redis(host=redis_host, port=redis_port)
     self.rs = redisearch.Client('product_name',
                                 host=redis_host,
                                 port=redis_port)
     try:
         self.rs.create_index(
             (redisearch.NumericField('id'), redisearch.TextField('name'),
              redisearch.TextField('description'),
              redisearch.TextField('vendor'),
              redisearch.NumericField('price'),
              redisearch.TextField('currency'),
              redisearch.TextField('category'),
              redisearch.TextField('images')))
     except Exception:
         print(f'error creating index')
     print(f'index info: {self.rs.info()}')
예제 #5
0
def force_create_transaction_index(
        index: str,
        custom_indexes: Optional[Iterable["custom_index"]] = None) -> None:
    """Create (and overwrite if necessary) index for a transaction type with optional custom_indexes"""
    # Delete the index with this name if necessary
    delete_index(index)
    client = _get_redisearch_index_client(index)
    # Set standard transaction indexes
    index_fields = [
        redisearch.TextField("tag"),
        redisearch.NumericField("timestamp", sortable=True),
        redisearch.NumericField("block_id", sortable=True),
    ]
    # Add custom indexes if they exist
    if custom_indexes:
        for idx in custom_indexes:
            index_fields.append(_get_custom_field_from_input(idx))
    # Create the actual index
    client.create_index(index_fields)
예제 #6
0
def _generate_block_indexes() -> None:
    client = _get_redisearch_index_client(Indexes.block.value)
    try:
        client.create_index([
            redisearch.NumericField("block_id", sortable=True),
            redisearch.NumericField("prev_id", sortable=True),
            redisearch.NumericField("timestamp", sortable=True),
        ])
    except redis.exceptions.ResponseError as e:
        if not str(e).startswith("Index already exists"
                                 ):  # We don't care if index already exists
            raise
    _log.info("Listing all blocks in storage")
    block_paths = storage.list_objects("BLOCK/")
    pattern = re.compile(r"BLOCK\/[0-9]+$")
    for block_path in block_paths:
        if re.search(pattern, block_path):
            # do a check to see if this block was already marked as indexed
            if not client.redis.sismember(BLOCK_MIGRATION_KEY, block_path):
                _log.info(f"Adding index for {block_path}")
                raw_block = storage.get_json_from_object(block_path)
                block = cast("model.BlockModel", None)
                if LEVEL == "1":
                    block = l1_block_model.new_from_stripped_block(raw_block)
                elif LEVEL == "2":
                    block = l2_block_model.new_from_at_rest(raw_block)
                elif LEVEL == "3":
                    block = l3_block_model.new_from_at_rest(raw_block)
                elif LEVEL == "4":
                    block = l4_block_model.new_from_at_rest(raw_block)
                elif LEVEL == "5":
                    block = l5_block_model.new_from_at_rest(raw_block)
                put_document(Indexes.block.value, block.block_id,
                             block.export_as_search_index())
                client.redis.sadd(BLOCK_MIGRATION_KEY, block_path)
            else:
                _log.info(f"Skipping already indexed block {block_path}")
예제 #7
0
def savetoredis(req_id, colnames, datavalues, expired_time):
    db.hmset("%s:cols" % req_id, {'cols': colnames})
    client = redisearch.Client(req_id)
    indexes = []
    for col in colnames:
        if "score" in col or "diff" in col or "row" in col or "z_score" in col or "p_value" in col:
            indexes.append(redisearch.NumericField(col, sortable=True))
        else:
            indexes.append(redisearch.TextField(col, sortable=True))
    client.create_index(indexes)
    for i in range(0, len(datavalues)):
        fields = {
            colnames[j]: datavalues[i][colnames[j]]
            for j in range(0, len(colnames))
        }
        client.add_document("%s_%d" % (req_id, i), **fields)
    # ---- set expiry for columns and documents ----
    #db.expire("%s:cols"%req_id,expired_time) let's comment for now and see how it goes
    drop_index.apply_async((req_id, ), countdown=expired_time)
예제 #8
0
파일: redisearch.py 프로젝트: edponce/FACET
class RediSearchSimstring(BaseSimstring):
    """RediSearch implementation of Simstring algorithm.

    Args:
        db (Dict[str, Any]): Options passed directly to
            'RediSearchDatabase()'.

    Kwargs: Options forwarded to 'BaseSimstring()'.
    """

    NAME = 'redisearch-simstring'

    _FIELDS = (
        redisearch.TextField('term', no_stem=True),
        redisearch.TextField('ng', no_stem=False),
        redisearch.NumericField('sz', sortable=True),
    )

    def __init__(
        self,
        *,
        # NOTE: Hijack 'db' parameter from 'BaseMatcher'
        db: Dict[str, Any] = None,
        **kwargs,
    ):
        super().__init__(**kwargs)

        if db is None:
            db = {}

        self._db = RediSearchDatabase(
            index=db.pop('index', 'facet'),
            fields=type(self)._FIELDS,
            **db,
        )
        # NOTE: Use document count as document IDs
        self._doc_id = len(self._db)

    def get_strings(self, size: int, feature: str) -> List[str]:
        """Get strings corresponding to feature size and query feature."""
        query = (
            redisearch.Query(feature).verbatim().limit_fields('ng').add_filter(
                redisearch.NumericFilter('sz', size,
                                         size)).return_fields('term'))
        return [document.term for document in self._db.get(query).docs]

    def insert(self, string: str):
        """Insert string into database."""
        features = self._ngram.get_features(string)
        # NOTE: RediSearch does not supports storing lists in a field,
        # so we create a document for each feature. Downside is the high
        # redundancy of data and extra storage.
        for i, feature in enumerate(features):
            self._db.set(
                str(self._doc_id + i),
                {
                    'term': string,
                    'sz': len(features),
                    'ng': feature,
                },
            )
        self._doc_id += len(features)
예제 #9
0
def main():
    print("hello!")

    r = redis.Redis(host=redis_host, port=redis_port)
    rs = redisearch.Client('recordIndex', redis_host, redis_port)

    # flush to get a fresh db
    # TODO - remove when dockerized
    r.flushall()

    record_collection = [{
        'title': 'Brothers and Sisters',
        'artist': 'Allman Brothers',
        'year': 1973,
        'genre': ['rock', 'southern rock', 'blues rock']
    }, {
        'title': 'Aja',
        'artist': 'Steely Dan',
        'year': 1977,
        'genre': ['rock', 'pop']
    }, {
        'title': 'Can\'t Buy a Thrill',
        'artist': 'Steely Dan',
        'year': 1972,
        'genre': ['rock', 'pop']
    }, {
        'title': 'Deguello',
        'artist': 'ZZ Top',
        'year': 1979,
        'genre': ['rock']
    }, {
        'title': 'American Beauty',
        'artist': 'Grateful Dead',
        'year': 1970,
        'genre': ['rock', 'psychedelic rock']
    }, {
        'title': 'Second Helping',
        'artist': 'Lynard Skynard',
        'year': 1974,
        'genre': ['rock', 'southern rock']
    }, {
        'title': 'The Joker',
        'artist': 'Steve Biller Band',
        'year': 1973,
        'genre': ['rock', 'blues rock']
    }, {
        'title': 'Book of Dreams',
        'artist': 'Steve Biller Band',
        'year': 1977,
        'genre': ['rock']
    }, {
        'title': 'Rumours',
        'artist': 'Fleetwood Mac',
        'year': 1977,
        'genre': ['rock', 'pop']
    }, {
        'title': 'Where We All Belong',
        'artist': 'Marshall Tucker Band',
        'year': 1974,
        'genre': ['rock', 'southern rock']
    }]

    try:
        rs.create_index((redisearch.TextField('title', sortable=True),
                         redisearch.TextField('artist', sortable=True),
                         redisearch.NumericField('year', sortable=True),
                         redisearch.TagField('genre', separator=',')))
    except Exception:
        print(f'Error creating index: {sys.exc_info()}')
    print(f'index info: {rs.info()}')

    run = True

    load_data(rs, record_collection)

    while run:
        txt = input("enter a search term: ")
        if (txt == "quit"):
            run = False
            break
        txt_arr = txt.split(' ', 1)
        print(f'searching {txt_arr}')
        if (txt_arr[0] == 'title'):
            res = rs.search(f'@title:{txt_arr[1]}')
            print(res)
        elif (txt_arr[0] == 'artist'):
            res = rs.search(f'@artist:{txt_arr[1]}')
            print(res)
        elif (txt_arr[0] == 'year'):
            full_txt_arr = txt.split(' ')
            former = full_txt_arr[1]
            latter = full_txt_arr[1]
            if (len(full_txt_arr) == 3):
                latter = full_txt_arr[2]
            res = rs.search(f'@year:[{former} {latter}]')
            print(res)
        elif (txt_arr[0] == 'genre'):
            pass
        else:
            print("invalid query")