def dict_mining(): nlp = BosonNLP('6cfIzKI1.27567.fLaZOvRXwl8f') s = ['整流级逆变级滤波器负载三相检测abcdq双SVM控制dqabcADRCADRCaubucu*du*quotωotωinvTrecTuqud图3基于ADRC的TSMC闭环控制系统框图Fig.3Closed-loopcontroldiagramofTSMCbasedonADRCADRC采用图1结构。', '但励磁绕组时间常数较大,闭环控制系统的截止频率较低,影响发电机输出端电压的响应速度。'] data = nlp.depparser(s) nouns = extract_noun(data) print(nouns)
class CNSegment: """ 封装分词工具。 使用bosonnlp提供API """ #停用词表 stopwords = [] def __init__(self): self.nlp=BosonNLP(bosonkey) def get_tags(self,sentences): """ 获取分词 :param sentences:分词的句子或者句子list :return: 分词结果list """ result= self.nlp.tag(sentences) return result def denoisingOne(self,tagdict , uTag = None,useStopWord = False): """通过词性和停用词去除噪声 :param tagList : 分词过后得到的列表 :param uTag : 需要去噪的词性标记列表,默认为('w','o','y','u') :return: 分词结果list """ if (uTag): uselessTag = uTag else: uselessTag = ('w', 'o', 'y', 'u') tagdict word_list = [] for index, it in enumerate(tagdict['tag']): if it[0] not in uselessTag: if not useStopWord: word_list.append(tagdict['word'][index]) elif tagdict['word'][index] not in self.stopwords: word_list.append(tagdict['word'][index]) return word_list def cut(self,sentences): """ 分词 :param sentences:需要分词的语料集 :return: 去噪后的单词list """ tags=self.get_tags(sentences) cutedSentences=[] for sentence in tags: cutedSentences.append(self.denoisingOne(sentence)) return cutedSentences def depenPars(self,sentences): return self.nlp.depparser(sentences)
def get_BosDepT(self, text): bos = BosonNLP(bosonkey) return bos.depparser(text)
class BosonNlpp: def __init__(self): self.bonlp = BosonNLP('IKBIoANy.14545.A7GCYBnT9jIB') #情感分析 def testSentiment(self, s): result = self.bonlp.sentiment(s) return result #print(result) #命名实体识别 def lexicalAnalysis(self, s): result = self.bonlp.ner(s)[0] return result #依存文法分析 def textDependency(self, s): result = self.bonlp.depparser(s) return result #关键词提取 def testKeywords(self, s): result = self.bonlp.extract_keywords(s, top_k=10) return result #新闻分类 def textClassify(self, s): resultlist = self.bonlp.classify(s) classifys = { 0: '体育', 1: '教育', 2: '财经', 3: '社会', 4: '娱乐', 5: '军事', 6: '国内', 7: '科技', 8: '互联网', 9: '房产', 10: '国际', 11: '女人', 12: '汽车', 13: '游戏' } return (classifys[resultlist[0]]) #语义联想 def lexicalSynonym(self, term): result = self.bonlp.suggest(term, top_k=10) return result #分词与词性标注 def fenci(self, s): result = self.bonlp.tag(s) return result def newssubstract(self, s): #s=s.encode('utf8') s = s.decode('utf-8') result = self.bonlp.summary('', s) return result