class Preprocessor(object): def __init__(self, configs): self.ltp_util = LtpUtil(configs) def process(self, stmt): """ 对句子进行初始化处理,包括分词、依存句法分析、情感分析等等。 """ # 分词和词性标注 seg, pos = self.cut(stmt.text) arcs = self.dependency_parse(seg, pos) stmt.set_words(list(seg)) stmt.set_pos(list(pos)) stmt.set_arcs(arcs) stmt.set_emotion(self.emotion_analysis(stmt.text)) return stmt def cut(self, text, HMM=True): seg = self.ltp_util.Segmentor(text) pos = self.ltp_util.Postagger(seg) return seg, pos def dependency_parse(self, seg, pos): # 调用pyltp进行依存句法分析 arcs = self.ltp_util.Parser(seg, pos) parse_result = [] for item in list(arcs): parse_result.append((item.head, item.relation)) return parse_result def emotion_analysis(self, text): # 返回情感分析结果 return None
class LtpTreeBuilder(object): def __init__(self, configs): self.ltp_util = LtpUtil(configs) def build(self, sentence): words = self.ltp_util.Segmentor(sentence) postags = self.ltp_util.Postagger(words) arcs = self.ltp_util.Parser(words, postags) head_index = -1 for i in range(len(arcs)): if arcs[i].head == 0: head_index = i + 1 tree = LtpTree(head_index, 'HED', 0, postags[head_index - 1], words[head_index - 1]) for i in range(len(arcs)): if i + 1 != head_index: p_tree = LtpTree(i + 1, arcs[i].relation, arcs[i].head, postags[i], words[i]) tree.addChild(p_tree) return tree
def __init__(self, configs): self.ltp_util = LtpUtil(configs)