def cut_topk(self, sentence, topk): if not isinstance(sentence, unicode): try: sentence = sentence.decode('utf-8') except UnicodeDecodeError: sentence = sentence.decode('gbk','ignore') graph = self.get_graph(sentence) items = ksp_yen(graph, 0, graph.N-1, topk) for item in items: words = [] path = item['path'] for i in xrange(1, len(path), 1): words.append(sentence[path[i-1]:path[i]]) yield words
def cut_topk(self, sentence, topk): if not isinstance(sentence, unicode): try: sentence = sentence.decode('utf-8') except UnicodeDecodeError: sentence = sentence.decode('gbk', 'ignore') graph = self.get_graph(sentence) items = ksp_yen(graph, 0, graph.N - 1, topk) for item in items: words = [] path = item['path'] for i in xrange(1, len(path), 1): words.append(sentence[path[i - 1]:path[i]]) yield words
def __cut_graph_topk(self, sentence, graph): items = ksp_yen(graph, 0, graph.N-1, self.topk) return self.choise_best(sentence, graph, items)
def __cut_graph_topk(self, sentence, graph): items = ksp_yen(graph, 0, graph.N - 1, self.topk) return self.choise_best(sentence, graph, items)