Esempio n. 1
0
class Preprocessor(object):

    def __init__(self, configs):
        self.ltp_util = LtpUtil(configs)

    def process(self, stmt):
        """
        对句子进行初始化处理,包括分词、依存句法分析、情感分析等等。
        """

        # 分词和词性标注
        seg, pos = self.cut(stmt.text)
        arcs = self.dependency_parse(seg, pos)

        stmt.set_words(list(seg))
        stmt.set_pos(list(pos))
        stmt.set_arcs(arcs)

        stmt.set_emotion(self.emotion_analysis(stmt.text))

        return stmt

    def cut(self, text, HMM=True):
        seg = self.ltp_util.Segmentor(text)
        pos = self.ltp_util.Postagger(seg)

        return seg, pos

    def dependency_parse(self, seg, pos):
        # 调用pyltp进行依存句法分析
        arcs = self.ltp_util.Parser(seg, pos)
        parse_result = []
        for item in list(arcs):
            parse_result.append((item.head, item.relation))
        return parse_result

    def emotion_analysis(self, text):
        # 返回情感分析结果
        return None
Esempio n. 2
0
class LtpTreeBuilder(object):
    def __init__(self, configs):
        self.ltp_util = LtpUtil(configs)

    def build(self, sentence):
        words = self.ltp_util.Segmentor(sentence)
        postags = self.ltp_util.Postagger(words)
        arcs = self.ltp_util.Parser(words, postags)

        head_index = -1
        for i in range(len(arcs)):
            if arcs[i].head == 0: head_index = i + 1

        tree = LtpTree(head_index, 'HED', 0, postags[head_index - 1],
                       words[head_index - 1])

        for i in range(len(arcs)):
            if i + 1 != head_index:
                p_tree = LtpTree(i + 1, arcs[i].relation, arcs[i].head,
                                 postags[i], words[i])
                tree.addChild(p_tree)

        return tree
Esempio n. 3
0
 def __init__(self, configs):
     self.ltp_util = LtpUtil(configs)