Ejemplo n.º 1
0
    def cut_topk(self, sentence, topk):
        if not isinstance(sentence, unicode):
            try:
                sentence = sentence.decode('utf-8')
            except UnicodeDecodeError:
                sentence = sentence.decode('gbk','ignore')

        graph = self.get_graph(sentence)
        items = ksp_yen(graph, 0, graph.N-1, topk)

        for item in items:
            words = []
            path = item['path']
            for i in xrange(1, len(path), 1):
                words.append(sentence[path[i-1]:path[i]])
            yield words
Ejemplo n.º 2
0
    def cut_topk(self, sentence, topk):
        if not isinstance(sentence, unicode):
            try:
                sentence = sentence.decode('utf-8')
            except UnicodeDecodeError:
                sentence = sentence.decode('gbk', 'ignore')

        graph = self.get_graph(sentence)
        items = ksp_yen(graph, 0, graph.N - 1, topk)

        for item in items:
            words = []
            path = item['path']
            for i in xrange(1, len(path), 1):
                words.append(sentence[path[i - 1]:path[i]])
            yield words
Ejemplo n.º 3
0
 def __cut_graph_topk(self, sentence, graph):
     items = ksp_yen(graph, 0, graph.N-1, self.topk)
     return self.choise_best(sentence, graph, items)
Ejemplo n.º 4
0
 def __cut_graph_topk(self, sentence, graph):
     items = ksp_yen(graph, 0, graph.N - 1, self.topk)
     return self.choise_best(sentence, graph, items)