Example #1
0
    def save_prefix_index(self):
        """docstring for save_prefix_index"""

        words = []
        words.append(self.title.lower())

        pipe = util.redis.pipeline()

        pipe.sadd(mk_sets_key(self.name, self.title), self.id)

        if util.pinyin_match:
            pinyin = Pinyin.t(self.title.lower(), "")
            words += pinyin

            pipe.sadd(mk_sets_key(self.name, pinyin), self.id)

        key = mk_complete_key(self.name)
        for word in words:
            for i in range(0, len(word)):
                prefix = word[0:i]
                pipe.zadd(key, prefix, 0)

            pipe.zadd(key, word + "*", 0)

        # commit
        pipe.execute()
Example #2
0
    def save_prefix_index(self):
        """docstring for save_prefix_index"""

        words = []
        words.append(self.title.lower())

        pipe = util.redis.pipeline()
        
        pipe.sadd(mk_sets_key(self.name, self.title), self.id)

        if util.pinyin_match:
            pinyin = Pinyin.t(self.title.lower(), "")
            words += pinyin

            pipe.sadd(mk_sets_key(self.name, pinyin), self.id)

        key = mk_complete_key(self.name)
        for word in words:
            for i in range(0, len(word)):
                prefix = word[0:i]
                pipe.zadd(key, prefix, 0)
            
            pipe.zadd(key, word + "*", 0)

        # commit
        pipe.execute()
Example #3
0
def complete(name, keyword, limit=10, conditions=None):
    """complete: prefix match search
        keyword
        limit: max match count"""

    conditions = conditions if isinstance(conditions, dict) and conditions else {}

    if not keyword and not conditions:
        logging.debug("no word and conditions")
        return []

    keyword = utf8(keyword.strip())
    prefix_matchs = []

    # This is not random, try to get replies < MTU size
    rangelen = util.complete_max_length
    prefix = keyword.lower()
    key = mk_complete_key(name)

    start = util.redis.zrank(key, prefix)

    if start:
        count = limit
        max_range = start + (rangelen * limit) - 1
        entries = util.redis.zrange(key, start, max_range)
        while len(prefix_matchs) <= count:
            start += rangelen
            if not entries or len(entries) == 0:
                break
            #entries sorted in desc so once entry is inconsistence with prefix will break
            for entry in entries:
                minlen = min(len(entry), len(prefix))

                #this entry break the consistency with prefix
                if entry[0:minlen] != prefix[0:minlen]:
                    count = len(prefix_matchs)
                    break

                # found matched entry
                if entry[-1] == "*" and len(prefix_matchs) != count:
                    match = entry[:-1]
                    if match not in prefix_matchs:
                        prefix_matchs.append(match)
            entries = entries[start:max_range]

    # 组合 words 的特别 key 名
    words = [mk_sets_key(name, word) for word in prefix_matchs]

    # 组合特别key,但这里不会像query那样放入words,
    # 因为在complete里面words是用union取的,condition_keys和words应该取交集
    condition_keys = [mk_condition_key(name, c, utf8(conditions[c]))
                      for c in conditions]
    # 按词语搜索
    temp_store_key = "tmpsunionstore:%s" % "+".join(words)
    if len(words) == 0:
        logging.info("no words")
    elif len(words) > 1:
        if not util.redis.exists(temp_store_key):
            # 将多个词语组合对比,得到并集,并存入临时区域
            util.redis.sunionstore(temp_store_key, words)
            # 将临时搜索设为1天后自动清除
            util.redis.expire(temp_store_key, 86400)
        # 根据需要的数量取出 ids
    else:
        temp_store_key = words[0]

    # 如果有条件,这里再次组合一下
    if condition_keys:
        if not words:
            condition_keys += temp_store_key
        temp_store_key = "tmpsinterstore:%s" % "+".join(condition_keys)
        if not util.redis.exists(temp_store_key):
            util.redis.sinterstore(temp_store_key, condition_keys)
            util.redis.expire(temp_store_key, 86400)

    ids = util.redis.sort(temp_store_key,
                          start=0,
                          num=limit,
                          by=mk_score_key(name, "*"),
                          desc=True)
    if not ids:
        return []
    return hmget(name, ids)
Example #4
0
def complete(name, keyword, limit=10, conditions=None):
    """docstring for complete"""

    conditions = conditions if isinstance(conditions, dict) and conditions else {}

    if not keyword and not conditions:
        logging.debug("no word and conditions")
        return []

    keyword = utf8(keyword.strip())
    prefix_matchs = []
    
    # This is not random, try to get replies < MTU size
    rangelen = util.complete_max_length
    prefix = keyword.lower()
    key = mk_complete_key(name)

    start = util.redis.zrank(key, prefix)

    if start:
        count = limit
        max_range = start+(rangelen*limit)-1
        entries = util.redis.zrange(key, start, max_range)
        
        while len(prefix_matchs) <= count:
            
            start += rangelen
            if not entries or len(entries) == 0:
                break
            
            for entry in entries:
                minlen = min(len(entry), len(prefix))

                if entry[0:minlen] != prefix[0:minlen]:
                    count = len(prefix_matchs)
                    break

                if entry[-1] == "*" and len(prefix_matchs) != count:

                    match = entry[:-1]
                    if match not in prefix_matchs:
                        prefix_matchs.append(match)
          
            entries = entries[start:max_range]

    # 组合 words 的特别 key 名
    words = []
    for word in prefix_matchs:
        words.append(mk_sets_key(name, word))

    # 组合特别 key ,但这里不会像 query 那样放入 words, 因为在 complete 里面 words 是用 union 取的,condition_keys 和 words 应该取交集
    condition_keys = []
    if conditions:
        for c in conditions:
            condition_keys.append(mk_condition_key(name, c, utf8(conditions[c])))
    
    # 按词语搜索
    temp_store_key = "tmpsunionstore:%s" % "+".join(words)
    if len(words) == 0:
        logging.info("no words")
    elif len(words) > 1:
        if not util.redis.exists(temp_store_key):
            
            # 将多个词语组合对比,得到并集,并存入临时区域   
            util.redis.sunionstore(temp_store_key, words)
            
            # 将临时搜索设为1天后自动清除
            util.redis.expire(temp_store_key, 86400)
        # 根据需要的数量取出 ids
    else:
        temp_store_key = words[0]

    # 如果有条件,这里再次组合一下
    if condition_keys:
        if not words:
            condition_keys += temp_store_key
            
        temp_store_key = "tmpsinterstore:%s" % "+".join(condition_keys)
        if not util.redis.exists(temp_store_key):
            util.redis.sinterstore(temp_store_key, condition_keys)
            util.redis.expire(temp_store_key, 86400)
     
    ids = util.redis.sort(temp_store_key,
                    start = 0,
                    num = limit,
                    by = mk_score_key(name, "*"),
                    desc = True)
    if not ids:
        return []
        
    return util.hmget(name, ids)