Example #1
0
 def _gen_suffix(self,phrase):
     terms = phrase
     key = terms[0:SUFFIX_MIN_LENGTH]
     for c in terms[SUFFIX_MIN_LENGTH:]:
         yield (chinese_key(key), c, ord(c))
         key = key + c
     yield (chinese_key(phrase), self.terminator, 0)
Example #2
0
 def put(self,title,item_id):
     """
     title --> segment --> sadd(phrase,item_id) -> zadd (phrase->prefix, suffix, 0)
                       --> pinyin --> sadd(phrase,item_id) -> zadd (phrase->prefix, suffix, 0)
     """
     if not title or not item_id:
         return
     
     for phrase in mmseg.seg_txt(title.encode('utf8')):
         if not phrase:
             continue
         phrase = phrase.decode('utf8')
         self._add_phrase(chinese_key(phrase),item_id)
         for (key,suffix,score) in self._gen_suffix(phrase):
             self._add_suffix(key,chinese_key(suffix),score)
         
         if not self.pinyin:
             continue
         
         phrase = self.pinyin.translate(phrase)
         if not phrase:
             continue
         
         for sub_phrase in self._gen_pinyin_phrase(phrase):
             self._add_phrase(sub_phrase,item_id)
             for (key,suffix,score) in self._gen_suffix(re.sub('\\s+','',sub_phrase)):
                 self._add_suffix(key,suffix,score)
Example #3
0
    def remove(self,title,item_id):
        if not title or not item_id:
            return
        
        for phrase in mmseg.seg_txt(title.encode('utf8')):
            if not phrase:
                continue
            
            phrase = phrase.decode('utf8')
            self._rem_phrase(chinese_key(phrase),item_id)

            if not self.pinyin:
                continue
            
            phrase = self.pinyin.translate(phrase)
            if not phrase:
                continue
            
            for sub_phrase in self._gen_pinyin_phrase(phrase):
                self._rem_phrase(sub_phrase,item_id)