Esempio n. 1
0
    def save_prefix_index(self):
        """docstring for save_prefix_index"""

        words = []
        words.append(self.title.lower())

        pipe = util.redis.pipeline()

        pipe.sadd(mk_sets_key(self.name, self.title), self.id)

        if util.pinyin_match:
            pinyin = Pinyin.t(self.title.lower(), "")
            words += pinyin

            pipe.sadd(mk_sets_key(self.name, pinyin), self.id)

        key = mk_complete_key(self.name)
        for word in words:
            for i in range(0, len(word)):
                prefix = word[0:i]
                pipe.zadd(key, prefix, 0)

            pipe.zadd(key, word + "*", 0)

        # commit
        pipe.execute()
Esempio n. 2
0
    def save_prefix_index(self):
        """docstring for save_prefix_index"""

        words = []
        words.append(self.title.lower())

        pipe = util.redis.pipeline()
        
        pipe.sadd(mk_sets_key(self.name, self.title), self.id)

        if util.pinyin_match:
            pinyin = Pinyin.t(self.title.lower(), "")
            words += pinyin

            pipe.sadd(mk_sets_key(self.name, pinyin), self.id)

        key = mk_complete_key(self.name)
        for word in words:
            for i in range(0, len(word)):
                prefix = word[0:i]
                pipe.zadd(key, prefix, 0)
            
            pipe.zadd(key, word + "*", 0)

        # commit
        pipe.execute()
Esempio n. 3
0
    def remove(self, name, id, title):
        """docstring for remove"""
        pipe = util.redis.pipeline()

        pipe.hdel(name, id)

        for word in self.split_words_for_index(title):
            key = mk_sets_key(name, word)
            pipe.srem(key, id)
            pipe.delete(mk_score_key(name, id))

        # remove set for prefix index key
        pipe.srem(mk_sets_key(name, title, id))

        # commit
        pipe.execute()
Esempio n. 4
0
    def remove(self, name, id, title):
        """docstring for remove"""

        pipe = util.redis.pipeline()

        pipe.hdel(name, id)
        words = self.split_words_for_index(title)

        for word in words:
            key = mk_sets_key(name, word)

            pipe.srem(key, id)
            pipe.delete(mk_score_key(name, id))

        # remove set for prefix index key
        pipe.srem(mk_sets_key(name, title, id))

        # commit
        pipe.execute()
Esempio n. 5
0
    def save(self):
        """docstring for save"""

        if not self.title:
            return False

        data = {
            'name': self.name,
            'id': self.id,
            'title': self.title
        }

        if self.exts:
            data.update(self.exts)

        pipe = util.redis.pipeline()

        # 将原始数据存入 hashes
        res = pipe.hset(self.name, self.id, json.dumps(data))

        # 保存 sets 索引,以分词的单词为key,用于后面搜索,里面存储 ids
        words = self.split_words_for_index(self.title)

        if not words:
            logging.info("no words")
            return False

        for word in words:
            key = mk_sets_key(self.name, word)

            # word index for item id
            pipe.sadd(key, self.id)

        if self.score == 'id':
            self.score = self.id

        # score for search sort
        pipe.set(mk_score_key(self.name, self.id), self.score)

        # 将目前的编号保存到条件(conditions)字段所创立的索引上面
        for field in self.condition_fields:
            pipe.sadd(mk_condition_key(self.name, field, utf8(data[field])), self.id)

        # commit
        pipe.execute()

        if self.prefix_index_enable:
            self.save_prefix_index()
Esempio n. 6
0
    def save(self):
        """docstring for save"""

        if not self.title:
            return False

        data = {'name': self.name, 'id': self.id, 'title': self.title}

        if self.exts:
            data.update(self.exts)

        pipe = util.redis.pipeline()

        # 将原始数据存入 hashes
        res = pipe.hset(self.name, self.id, json.dumps(data))

        # 保存 sets 索引,以分词的单词为key,用于后面搜索,里面存储 ids
        words = self.split_words_for_index(self.title)

        if not words:
            logging.info("no words")
            return False

        for word in words:
            key = mk_sets_key(self.name, word)

            # word index for item id
            pipe.sadd(key, self.id)

        if self.score == 'id':
            self.score = self.id

        # score for search sort
        pipe.set(mk_score_key(self.name, self.id), self.score)

        # 将目前的编号保存到条件(conditions)字段所创立的索引上面
        for field in self.condition_fields:
            pipe.sadd(mk_condition_key(self.name, field, utf8(data[field])),
                      self.id)

        # commit
        pipe.execute()

        if self.prefix_index_enable:
            self.save_prefix_index()
Esempio n. 7
0
def complete(name, keyword, limit=10, conditions=None):
    """complete: prefix match search
        keyword
        limit: max match count"""

    conditions = conditions if isinstance(conditions, dict) and conditions else {}

    if not keyword and not conditions:
        logging.debug("no word and conditions")
        return []

    keyword = utf8(keyword.strip())
    prefix_matchs = []

    # This is not random, try to get replies < MTU size
    rangelen = util.complete_max_length
    prefix = keyword.lower()
    key = mk_complete_key(name)

    start = util.redis.zrank(key, prefix)

    if start:
        count = limit
        max_range = start + (rangelen * limit) - 1
        entries = util.redis.zrange(key, start, max_range)
        while len(prefix_matchs) <= count:
            start += rangelen
            if not entries or len(entries) == 0:
                break
            #entries sorted in desc so once entry is inconsistence with prefix will break
            for entry in entries:
                minlen = min(len(entry), len(prefix))

                #this entry break the consistency with prefix
                if entry[0:minlen] != prefix[0:minlen]:
                    count = len(prefix_matchs)
                    break

                # found matched entry
                if entry[-1] == "*" and len(prefix_matchs) != count:
                    match = entry[:-1]
                    if match not in prefix_matchs:
                        prefix_matchs.append(match)
            entries = entries[start:max_range]

    # 组合 words 的特别 key 名
    words = [mk_sets_key(name, word) for word in prefix_matchs]

    # 组合特别key,但这里不会像query那样放入words,
    # 因为在complete里面words是用union取的,condition_keys和words应该取交集
    condition_keys = [mk_condition_key(name, c, utf8(conditions[c]))
                      for c in conditions]
    # 按词语搜索
    temp_store_key = "tmpsunionstore:%s" % "+".join(words)
    if len(words) == 0:
        logging.info("no words")
    elif len(words) > 1:
        if not util.redis.exists(temp_store_key):
            # 将多个词语组合对比,得到并集,并存入临时区域
            util.redis.sunionstore(temp_store_key, words)
            # 将临时搜索设为1天后自动清除
            util.redis.expire(temp_store_key, 86400)
        # 根据需要的数量取出 ids
    else:
        temp_store_key = words[0]

    # 如果有条件,这里再次组合一下
    if condition_keys:
        if not words:
            condition_keys += temp_store_key
        temp_store_key = "tmpsinterstore:%s" % "+".join(condition_keys)
        if not util.redis.exists(temp_store_key):
            util.redis.sinterstore(temp_store_key, condition_keys)
            util.redis.expire(temp_store_key, 86400)

    ids = util.redis.sort(temp_store_key,
                          start=0,
                          num=limit,
                          by=mk_score_key(name, "*"),
                          desc=True)
    if not ids:
        return []
    return hmget(name, ids)
Esempio n. 8
0
def query(name, text, offset=0, limit=10, sort_field='id', conditions=None):
    """docstring for query"""

    conditions = conditions if isinstance(conditions, dict) and conditions else {}

    tm = time.time()
    result = []

    # 如果搜索文本和查询条件均没有,那就直接返回 []
    if not text.strip() and not conditions:
        return result

    text = utf8(text.strip())
    splited_words = split_words(text)

    words = [mk_sets_key(name, word) for word in splited_words]

    if conditions:
        condition_keys = [mk_condition_key(name, c, utf8(conditions[c]))
                          for c in conditions]
        # 将条件的 key 放入关键词搜索集合内,用于 sinterstore 搜索
        words += condition_keys
    else:
        condition_keys = []

    if not words:
        return result

    temp_store_key = "tmpinterstore:%s" % "+".join(words)

    if len(words) > 1:
        if not util.redis.exists(temp_store_key):
            # 将多个词语组合对比,得到交集,并存入临时区域
            util.redis.sinterstore(temp_store_key, words)
            # 将临时搜索设为1天后自动清除
            util.redis.expire(temp_store_key, 86400)
        # 拼音搜索
        if util.pinyin_match:
            splited_pinyin_words = split_pinyin(text)

            pinyin_words = [mk_sets_key(name, w) for w in splited_pinyin_words]
            pinyin_words += condition_keys
            temp_sunion_key = "tmpsunionstore:%s" % "+".join(words)
            temp_pinyin_store_key = "tmpinterstore:%s" % "+".join(pinyin_words)
            # 找出拼音的交集
            util.redis.sinterstore(temp_pinyin_store_key, pinyin_words)
            # 合并中文和拼音的搜索结果
            util.redis.sunionstore(temp_sunion_key, [temp_store_key, temp_pinyin_store_key])
            # 将临时搜索设为1天后自动清除
            util.redis.expire(temp_pinyin_store_key, 86400)
            util.redis.expire(temp_sunion_key, 86400)
            temp_store_key = temp_sunion_key
    else:
        temp_store_key = words[0]

    # 根据需要的数量取出 ids
    ids = util.redis.sort(temp_store_key,
                          start=offset,
                          num=limit,
                          by=mk_score_key(name, "*"),
                          desc=True)
    result = hmget(name, ids, sort_field=sort_field)
    logging.debug("{}:\"{}\" | Time spend:{}s".format(name, text, time.time()-tm))
    return result
Esempio n. 9
0
def complete(name, keyword, limit=10, conditions=None):
    """docstring for complete"""

    conditions = conditions if isinstance(conditions, dict) and conditions else {}

    if not keyword and not conditions:
        logging.debug("no word and conditions")
        return []

    keyword = utf8(keyword.strip())
    prefix_matchs = []
    
    # This is not random, try to get replies < MTU size
    rangelen = util.complete_max_length
    prefix = keyword.lower()
    key = mk_complete_key(name)

    start = util.redis.zrank(key, prefix)

    if start:
        count = limit
        max_range = start+(rangelen*limit)-1
        entries = util.redis.zrange(key, start, max_range)
        
        while len(prefix_matchs) <= count:
            
            start += rangelen
            if not entries or len(entries) == 0:
                break
            
            for entry in entries:
                minlen = min(len(entry), len(prefix))

                if entry[0:minlen] != prefix[0:minlen]:
                    count = len(prefix_matchs)
                    break

                if entry[-1] == "*" and len(prefix_matchs) != count:

                    match = entry[:-1]
                    if match not in prefix_matchs:
                        prefix_matchs.append(match)
          
            entries = entries[start:max_range]

    # 组合 words 的特别 key 名
    words = []
    for word in prefix_matchs:
        words.append(mk_sets_key(name, word))

    # 组合特别 key ,但这里不会像 query 那样放入 words, 因为在 complete 里面 words 是用 union 取的,condition_keys 和 words 应该取交集
    condition_keys = []
    if conditions:
        for c in conditions:
            condition_keys.append(mk_condition_key(name, c, utf8(conditions[c])))
    
    # 按词语搜索
    temp_store_key = "tmpsunionstore:%s" % "+".join(words)
    if len(words) == 0:
        logging.info("no words")
    elif len(words) > 1:
        if not util.redis.exists(temp_store_key):
            
            # 将多个词语组合对比,得到并集,并存入临时区域   
            util.redis.sunionstore(temp_store_key, words)
            
            # 将临时搜索设为1天后自动清除
            util.redis.expire(temp_store_key, 86400)
        # 根据需要的数量取出 ids
    else:
        temp_store_key = words[0]

    # 如果有条件,这里再次组合一下
    if condition_keys:
        if not words:
            condition_keys += temp_store_key
            
        temp_store_key = "tmpsinterstore:%s" % "+".join(condition_keys)
        if not util.redis.exists(temp_store_key):
            util.redis.sinterstore(temp_store_key, condition_keys)
            util.redis.expire(temp_store_key, 86400)
     
    ids = util.redis.sort(temp_store_key,
                    start = 0,
                    num = limit,
                    by = mk_score_key(name, "*"),
                    desc = True)
    if not ids:
        return []
        
    return util.hmget(name, ids)
Esempio n. 10
0
def query(name, text, offset=0, limit=10, sort_field='id', conditions=None):
    """docstring for query"""

    conditions = conditions if isinstance(conditions, dict) and conditions else {}

    tm = time.time()
    result = []

    # 如果搜索文本和查询条件均没有,那就直接返回 []
    if not text.strip() and not conditions:
        return result

    text = utf8(text.strip())
    splited_words = split_words(text)

    words = []
    for word in splited_words:
        words.append(mk_sets_key(name, word))

    condition_keys = []
    if conditions:
        for c in conditions:
            condition_keys.append(mk_condition_key(name, c, utf8(conditions[c])))
            
        # 将条件的 key 放入关键词搜索集合内,用于 sinterstore 搜索
        words += condition_keys
    
    if not words:
        return result

    temp_store_key = "tmpinterstore:%s" % "+".join(words)
    
    if len(words) > 1:
        if not util.redis.exists(temp_store_key):
            # 将多个词语组合对比,得到交集,并存入临时区域
            util.redis.sinterstore(temp_store_key, words)
            
            # 将临时搜索设为1天后自动清除
            util.redis.expire(temp_store_key, 86400)
        
        # 拼音搜索
        if util.pinyin_match:
            splited_pinyin_words = split_pinyin(text)

            pinyin_words = []
            for w in splited_pinyin_words:
                pinyin_words.append(mk_sets_key(name, w))
                
            pinyin_words += condition_keys
            
            temp_sunion_key = "tmpsunionstore:%s" % "+".join(words)
            temp_pinyin_store_key = "tmpinterstore:%s" % "+".join(pinyin_words)
            
            # 找出拼音的
            util.redis.sinterstore(temp_pinyin_store_key, pinyin_words)
            
            # 合并中文和拼音的搜索结果
            util.redis.sunionstore(temp_sunion_key, [temp_store_key, temp_pinyin_store_key])
            
            # 将临时搜索设为1天后自动清除
            util.redis.expire(temp_pinyin_store_key, 86400)
            util.redis.expire(temp_sunion_key, 86400)
            
            temp_store_key = temp_sunion_key
    else:
        temp_store_key = words[0]

    # 根据需要的数量取出 ids
    ids = util.redis.sort(temp_store_key,
                    start = offset,
                    num = limit,
                    by = mk_score_key(name, "*"),
                    desc = True)

    result = util.hmget(name, ids, sort_field=sort_field)
    logging.debug("%s:\"%s\" | Time spend:%ss" % (name, text, time.time()-tm))
    return result