def save_prefix_index(self): """docstring for save_prefix_index""" words = [] words.append(self.title.lower()) pipe = util.redis.pipeline() pipe.sadd(mk_sets_key(self.name, self.title), self.id) if util.pinyin_match: pinyin = Pinyin.t(self.title.lower(), "") words += pinyin pipe.sadd(mk_sets_key(self.name, pinyin), self.id) key = mk_complete_key(self.name) for word in words: for i in range(0, len(word)): prefix = word[0:i] pipe.zadd(key, prefix, 0) pipe.zadd(key, word + "*", 0) # commit pipe.execute()
def complete(name, keyword, limit=10, conditions=None): """complete: prefix match search keyword limit: max match count""" conditions = conditions if isinstance(conditions, dict) and conditions else {} if not keyword and not conditions: logging.debug("no word and conditions") return [] keyword = utf8(keyword.strip()) prefix_matchs = [] # This is not random, try to get replies < MTU size rangelen = util.complete_max_length prefix = keyword.lower() key = mk_complete_key(name) start = util.redis.zrank(key, prefix) if start: count = limit max_range = start + (rangelen * limit) - 1 entries = util.redis.zrange(key, start, max_range) while len(prefix_matchs) <= count: start += rangelen if not entries or len(entries) == 0: break #entries sorted in desc so once entry is inconsistence with prefix will break for entry in entries: minlen = min(len(entry), len(prefix)) #this entry break the consistency with prefix if entry[0:minlen] != prefix[0:minlen]: count = len(prefix_matchs) break # found matched entry if entry[-1] == "*" and len(prefix_matchs) != count: match = entry[:-1] if match not in prefix_matchs: prefix_matchs.append(match) entries = entries[start:max_range] # 组合 words 的特别 key 名 words = [mk_sets_key(name, word) for word in prefix_matchs] # 组合特别key,但这里不会像query那样放入words, # 因为在complete里面words是用union取的,condition_keys和words应该取交集 condition_keys = [mk_condition_key(name, c, utf8(conditions[c])) for c in conditions] # 按词语搜索 temp_store_key = "tmpsunionstore:%s" % "+".join(words) if len(words) == 0: logging.info("no words") elif len(words) > 1: if not util.redis.exists(temp_store_key): # 将多个词语组合对比,得到并集,并存入临时区域 util.redis.sunionstore(temp_store_key, words) # 将临时搜索设为1天后自动清除 util.redis.expire(temp_store_key, 86400) # 根据需要的数量取出 ids else: temp_store_key = words[0] # 如果有条件,这里再次组合一下 if condition_keys: if not words: condition_keys += temp_store_key temp_store_key = "tmpsinterstore:%s" % "+".join(condition_keys) if not util.redis.exists(temp_store_key): util.redis.sinterstore(temp_store_key, condition_keys) util.redis.expire(temp_store_key, 86400) ids = util.redis.sort(temp_store_key, start=0, num=limit, by=mk_score_key(name, "*"), desc=True) if not ids: return [] return hmget(name, ids)
def complete(name, keyword, limit=10, conditions=None): """docstring for complete""" conditions = conditions if isinstance(conditions, dict) and conditions else {} if not keyword and not conditions: logging.debug("no word and conditions") return [] keyword = utf8(keyword.strip()) prefix_matchs = [] # This is not random, try to get replies < MTU size rangelen = util.complete_max_length prefix = keyword.lower() key = mk_complete_key(name) start = util.redis.zrank(key, prefix) if start: count = limit max_range = start+(rangelen*limit)-1 entries = util.redis.zrange(key, start, max_range) while len(prefix_matchs) <= count: start += rangelen if not entries or len(entries) == 0: break for entry in entries: minlen = min(len(entry), len(prefix)) if entry[0:minlen] != prefix[0:minlen]: count = len(prefix_matchs) break if entry[-1] == "*" and len(prefix_matchs) != count: match = entry[:-1] if match not in prefix_matchs: prefix_matchs.append(match) entries = entries[start:max_range] # 组合 words 的特别 key 名 words = [] for word in prefix_matchs: words.append(mk_sets_key(name, word)) # 组合特别 key ,但这里不会像 query 那样放入 words, 因为在 complete 里面 words 是用 union 取的,condition_keys 和 words 应该取交集 condition_keys = [] if conditions: for c in conditions: condition_keys.append(mk_condition_key(name, c, utf8(conditions[c]))) # 按词语搜索 temp_store_key = "tmpsunionstore:%s" % "+".join(words) if len(words) == 0: logging.info("no words") elif len(words) > 1: if not util.redis.exists(temp_store_key): # 将多个词语组合对比,得到并集,并存入临时区域 util.redis.sunionstore(temp_store_key, words) # 将临时搜索设为1天后自动清除 util.redis.expire(temp_store_key, 86400) # 根据需要的数量取出 ids else: temp_store_key = words[0] # 如果有条件,这里再次组合一下 if condition_keys: if not words: condition_keys += temp_store_key temp_store_key = "tmpsinterstore:%s" % "+".join(condition_keys) if not util.redis.exists(temp_store_key): util.redis.sinterstore(temp_store_key, condition_keys) util.redis.expire(temp_store_key, 86400) ids = util.redis.sort(temp_store_key, start = 0, num = limit, by = mk_score_key(name, "*"), desc = True) if not ids: return [] return util.hmget(name, ids)