def splitTerms(self, text, categories=None): terms = [] for sentence in lexicon.splitSentence(text): if sentence: for mixed in lexicon.iterMixTerms(sentence): # English term if mixed.startswith('E'): terms.append(mixed) # Chinese sentence else: terms.extend(self.db.splitTerms(mixed, categories)) return terms
def splitNgramTerms(self, text): terms = [] for sentence in lexicon.splitSentence(text): if sentence: for mixed in lexicon.iterMixTerms(sentence): # English term if mixed.startswith('E'): terms.append(mixed) # Chinese sentence else: for n in xrange(1, self.ngram+1): terms.extend(lexicon.iterTerms(n, mixed, False)) return terms
def splitMixTerms(self, text): return list(lexicon.iterMixTerms(text))