Esempio n. 1
0
def initialize():
    apps = redis_conn.smembers("apps:index")
    if not apps:
        try:
            redis_search.create_index([
                redisearch.TextField("appid"),
                redisearch.TextField("name"),
                redisearch.TextField("summary"),
                redisearch.TextField("description", 0.2),
                redisearch.TextField("keywords"),
            ])
        except:
            pass
Esempio n. 2
0
def _get_custom_field_from_input(custom_index_input: "custom_index") -> redisearch.client.Field:
    input_type = custom_index_input["type"]
    field_name = custom_index_input["field_name"]
    options = custom_index_input.get("options")
    if input_type == "text":
        weight = 1.0
        sortable = False
        no_stem = False
        no_index = False
        if options:
            sortable = bool(options.get("sortable"))
            no_stem = bool(options.get("no_stem"))
            no_index = bool(options.get("no_index"))
            cust_weight = options.get("weight")
            if isinstance(cust_weight, (int, float)) and cust_weight >= 0 and cust_weight <= 1:
                weight = float(cust_weight)
        return redisearch.TextField(field_name, weight=weight, sortable=sortable, no_stem=no_stem, no_index=no_index)
    elif input_type == "tag":
        separator = ","
        no_index = False
        if options:
            separator = options.get("separator") or ","
            no_index = bool(options.get("no_index"))
        return redisearch.TagField(field_name, separator=separator, no_index=no_index)
    elif input_type == "number":
        sortable = False
        no_index = False
        if options:
            sortable = bool(options.get("sortable"))
            no_index = bool(options.get("no_index"))
        return redisearch.NumericField(field_name, sortable=sortable, no_index=no_index)
    else:
        raise RuntimeError(f"Index type {input_type} is not supported")
Esempio n. 3
0
    def __init__(
        self,
        index: str = 'test',
        *,
        fields: Iterable[Any] = (redisearch.TextField('text'),),
        host: str = 'localhost',
        port: int = 6379,
        access_mode: str = 'c',
        use_pipeline: bool = False,
        chunk_size: int = 10000,
        connect: bool = True,
        max_connect_attempts: int = 3,
        **conn_info,
    ):
        self._conn = None
        self._conn_pipe = None
        self._host = host
        self._port = port
        # NOTE: RediSearch index can only be created in database index 0.
        self._n = 0
        self._index = index
        self._fields = fields
        self._access_mode = access_mode
        self._use_pipeline = use_pipeline
        self._chunk_size = chunk_size
        self._max_connect_attempts = max_connect_attempts
        self._conn_info = conn_info

        if connect:
            self.connect()
        else:
            self._pre_connect()
Esempio n. 4
0
def get_redisearch_cli(chat_id):
    idx = get_index_name(chat_id)
    # TODO supports for redis authentication & cluster
    cli = redisearch.Client(idx, host=REDIS_HOST, port=REDIS_PORT)
    logger.debug('get client with idx %s for chat %s', idx, chat_id)
    try:
        # cli.drop_index()  # TODO dedicate API for dropping index
        cli.create_index([
            redisearch.TextField('msg', weight=5.),
            redisearch.TextField('msg_id', weight=0.),
            redisearch.TextField('user', weight=0.),
            redisearch.TextField('ts', weight=0.),
        ])
    except redis.exceptions.ResponseError as e:
        if e.message != 'Index already exists':
            raise
    return cli
Esempio n. 5
0
def startup_event():
    remote_add_cmd = [
        "flatpak",
        "--user",
        "remote-add",
        "--if-not-exists",
        "flathub",
        "https://flathub.org/repo/flathub.flatpakrepo",
    ]
    subprocess.run(remote_add_cmd,
                   stdout=subprocess.DEVNULL,
                   stderr=subprocess.DEVNULL)

    apps = redis_conn.smembers("apps:index")
    if not apps:
        try:
            redis_search.create_index([
                redisearch.TextField("name"),
                redisearch.TextField("summary"),
                redisearch.TextField("description", 0.2),
                redisearch.TextField("keywords"),
            ])
        except:
            pass
Esempio n. 6
0
def force_create_transaction_index(
        index: str,
        custom_indexes: Optional[Iterable["custom_index"]] = None) -> None:
    """Create (and overwrite if necessary) index for a transaction type with optional custom_indexes"""
    # Delete the index with this name if necessary
    delete_index(index)
    client = _get_redisearch_index_client(index)
    # Set standard transaction indexes
    index_fields = [
        redisearch.TextField("tag"),
        redisearch.NumericField("timestamp", sortable=True),
        redisearch.NumericField("block_id", sortable=True),
    ]
    # Add custom indexes if they exist
    if custom_indexes:
        for idx in custom_indexes:
            index_fields.append(_get_custom_field_from_input(idx))
    # Create the actual index
    client.create_index(index_fields)
def savetoredis(req_id, colnames, datavalues, expired_time):
    db.hmset("%s:cols" % req_id, {'cols': colnames})
    client = redisearch.Client(req_id)
    indexes = []
    for col in colnames:
        if "score" in col or "diff" in col or "row" in col or "z_score" in col or "p_value" in col:
            indexes.append(redisearch.NumericField(col, sortable=True))
        else:
            indexes.append(redisearch.TextField(col, sortable=True))
    client.create_index(indexes)
    for i in range(0, len(datavalues)):
        fields = {
            colnames[j]: datavalues[i][colnames[j]]
            for j in range(0, len(colnames))
        }
        client.add_document("%s_%d" % (req_id, i), **fields)
    # ---- set expiry for columns and documents ----
    #db.expire("%s:cols"%req_id,expired_time) let's comment for now and see how it goes
    drop_index.apply_async((req_id, ), countdown=expired_time)
Esempio n. 8
0
 def __init__(self):
     # setup redis clients
     self.r = redis.Redis(host=redis_host, port=redis_port)
     self.rs = redisearch.Client('product_name',
                                 host=redis_host,
                                 port=redis_port)
     try:
         self.rs.create_index(
             (redisearch.NumericField('id'), redisearch.TextField('name'),
              redisearch.TextField('description'),
              redisearch.TextField('vendor'),
              redisearch.NumericField('price'),
              redisearch.TextField('currency'),
              redisearch.TextField('category'),
              redisearch.TextField('images')))
     except Exception:
         print(f'error creating index')
     print(f'index info: {self.rs.info()}')
Esempio n. 9
0
class RediSearch(BaseMatcher):
    """RediSearch.

    Args:
        alpha (float): Similarity threshold in range (0,1].

        similarity (str, BaseSimilarity): Similarity measure instance or name.

        db (Dict[str, Any]): Options passed directly to
            'RediSearchDatabase()'.
            index (str): RediSearch index name for storage.

    Kwargs: Options forwarded to 'BaseMatcher()'.
    """

    NAME = 'redisearch-match'

    _FIELDS = (redisearch.TextField('term'), )

    def __init__(
        self,
        *,
        alpha: float = 0.7,
        similarity: str = None,
        # NOTE: Hijack 'db' parameter from 'BaseMatcher'
        db: Dict[str, Any] = None,
        **kwargs,
    ):
        super().__init__(**kwargs)
        self._alpha = None
        self._similarity = None

        self.alpha = alpha
        self.similarity = similarity

        if db is None:
            db = {}

        self._db = RediSearchDatabase(
            index=db.pop('index', 'facet'),
            fields=type(self)._FIELDS,
            **db,
        )
        # NOTE: Use document count as document IDs
        self._doc_id = len(self._db)

    @property
    def alpha(self):
        return self._alpha

    @alpha.setter
    def alpha(self, alpha: float):
        self._alpha = alpha

    @alpha.setter
    def alpha(self, alpha: float):
        self._alpha = get_alpha(alpha)

    @property
    def similarity(self):
        return self._similarity

    @similarity.setter
    def similarity(self, similarity):
        # NOTE: Clear cache database if similarity measure changes because
        # results may differ.
        if self._cache_db is not None and self._cache_db.ping():
            self._cache_db.clear()
        self._similarity = get_similarity(similarity)

    def insert(self, string: str):
        """Insert string into database."""
        self._db.set(str(self._doc_id), {'term': string})
        self._doc_id += 1

    def search(
        self,
        string: str,
        *,
        alpha: float = None,
        similarity: str = None,
        rank: bool = True,
    ) -> Union[List[Tuple[str, float]], List[str]]:
        """Approximate dictionary matching.

        Args:
            alpha (float): Similarity threshold.

            similarity (str): Instance of similarity measure or
                similarity name.
        """
        alpha = (self._alpha if alpha is None else get_alpha(alpha))
        similarity = (self._similarity
                      if similarity is None else get_similarity(similarity))

        # NOTE: Cached data assumes approximate string matching parameters
        # (similariy measure) are the same with the exception of 'alpha'
        # because results may differ. Therefore, do not use cache database
        # if similarity measure from argument differs from internal
        # similarity measure.
        use_cache = (similarity == self._similarity
                     and self._cache_db is not None)

        if use_cache:
            strings_and_similarities = self._cache_db.get(string)
            if strings_and_similarities is not None:
                return strings_and_similarities

        candidate_strings = [
            document.term for document in self._db.get(string).docs
        ]

        similarities = [
            self._similarity(string, candidate_string)
            for candidate_string in candidate_strings
        ]
        strings_and_similarities = list(
            filter(lambda ss: ss[1] >= alpha,
                   zip(candidate_strings, similarities)))
        if rank:
            strings_and_similarities.sort(key=lambda ss: ss[1], reverse=True)

        # NOTE: Need a way to limit database and only cache heavy hitters.
        if use_cache:
            self._cache_db.set(string, strings_and_similarities)

        return strings_and_similarities
Esempio n. 10
0
class RediSearchSimstring(BaseSimstring):
    """RediSearch implementation of Simstring algorithm.

    Args:
        db (Dict[str, Any]): Options passed directly to
            'RediSearchDatabase()'.

    Kwargs: Options forwarded to 'BaseSimstring()'.
    """

    NAME = 'redisearch-simstring'

    _FIELDS = (
        redisearch.TextField('term', no_stem=True),
        redisearch.TextField('ng', no_stem=False),
        redisearch.NumericField('sz', sortable=True),
    )

    def __init__(
        self,
        *,
        # NOTE: Hijack 'db' parameter from 'BaseMatcher'
        db: Dict[str, Any] = None,
        **kwargs,
    ):
        super().__init__(**kwargs)

        if db is None:
            db = {}

        self._db = RediSearchDatabase(
            index=db.pop('index', 'facet'),
            fields=type(self)._FIELDS,
            **db,
        )
        # NOTE: Use document count as document IDs
        self._doc_id = len(self._db)

    def get_strings(self, size: int, feature: str) -> List[str]:
        """Get strings corresponding to feature size and query feature."""
        query = (
            redisearch.Query(feature).verbatim().limit_fields('ng').add_filter(
                redisearch.NumericFilter('sz', size,
                                         size)).return_fields('term'))
        return [document.term for document in self._db.get(query).docs]

    def insert(self, string: str):
        """Insert string into database."""
        features = self._ngram.get_features(string)
        # NOTE: RediSearch does not supports storing lists in a field,
        # so we create a document for each feature. Downside is the high
        # redundancy of data and extra storage.
        for i, feature in enumerate(features):
            self._db.set(
                str(self._doc_id + i),
                {
                    'term': string,
                    'sz': len(features),
                    'ng': feature,
                },
            )
        self._doc_id += len(features)
Esempio n. 11
0
def main():
    print("hello!")

    r = redis.Redis(host=redis_host, port=redis_port)
    rs = redisearch.Client('recordIndex', redis_host, redis_port)

    # flush to get a fresh db
    # TODO - remove when dockerized
    r.flushall()

    record_collection = [{
        'title': 'Brothers and Sisters',
        'artist': 'Allman Brothers',
        'year': 1973,
        'genre': ['rock', 'southern rock', 'blues rock']
    }, {
        'title': 'Aja',
        'artist': 'Steely Dan',
        'year': 1977,
        'genre': ['rock', 'pop']
    }, {
        'title': 'Can\'t Buy a Thrill',
        'artist': 'Steely Dan',
        'year': 1972,
        'genre': ['rock', 'pop']
    }, {
        'title': 'Deguello',
        'artist': 'ZZ Top',
        'year': 1979,
        'genre': ['rock']
    }, {
        'title': 'American Beauty',
        'artist': 'Grateful Dead',
        'year': 1970,
        'genre': ['rock', 'psychedelic rock']
    }, {
        'title': 'Second Helping',
        'artist': 'Lynard Skynard',
        'year': 1974,
        'genre': ['rock', 'southern rock']
    }, {
        'title': 'The Joker',
        'artist': 'Steve Biller Band',
        'year': 1973,
        'genre': ['rock', 'blues rock']
    }, {
        'title': 'Book of Dreams',
        'artist': 'Steve Biller Band',
        'year': 1977,
        'genre': ['rock']
    }, {
        'title': 'Rumours',
        'artist': 'Fleetwood Mac',
        'year': 1977,
        'genre': ['rock', 'pop']
    }, {
        'title': 'Where We All Belong',
        'artist': 'Marshall Tucker Band',
        'year': 1974,
        'genre': ['rock', 'southern rock']
    }]

    try:
        rs.create_index((redisearch.TextField('title', sortable=True),
                         redisearch.TextField('artist', sortable=True),
                         redisearch.NumericField('year', sortable=True),
                         redisearch.TagField('genre', separator=',')))
    except Exception:
        print(f'Error creating index: {sys.exc_info()}')
    print(f'index info: {rs.info()}')

    run = True

    load_data(rs, record_collection)

    while run:
        txt = input("enter a search term: ")
        if (txt == "quit"):
            run = False
            break
        txt_arr = txt.split(' ', 1)
        print(f'searching {txt_arr}')
        if (txt_arr[0] == 'title'):
            res = rs.search(f'@title:{txt_arr[1]}')
            print(res)
        elif (txt_arr[0] == 'artist'):
            res = rs.search(f'@artist:{txt_arr[1]}')
            print(res)
        elif (txt_arr[0] == 'year'):
            full_txt_arr = txt.split(' ')
            former = full_txt_arr[1]
            latter = full_txt_arr[1]
            if (len(full_txt_arr) == 3):
                latter = full_txt_arr[2]
            res = rs.search(f'@year:[{former} {latter}]')
            print(res)
        elif (txt_arr[0] == 'genre'):
            pass
        else:
            print("invalid query")