def classify(contents): nlp = BosonNLP(boson_token) result = nlp.classify(contents) topics = [] for topic in result: topics.append(topic_to_id[topic]) return topics
def Class_ification(sentence): #进行文本分类 plot.rcParams['font.sans-serif'] = ['SimHei'] plot.rcParams['axes.unicode_minus'] = False nlp = BosonNLP('TPDuivpZ.27572.rVuPCI9-kUlN') result = nlp.classify(sentence) info = { 0: "体育", 1: "教育", 2: "财经", 3: "社会", 4: "娱乐", 5: "军事", 6: "国内", 7: "科技", 8: "互联网", 9: "房产", 10: "科技", 11: "女人", 12: "汽车", 13: "游戏", } DG = nx.DiGraph() plot.figure(figsize=(3, 3)) plot.subplot(1, 1, 1) plot.title('文本分类', color='red', fontsize=15) DG.add_node(info[result[0]]) nx.draw(DG, with_labels=True, node_size=6000, node_color='lightblue') plot.show()
class _BosonNLPWrapper(object): """ NLP object using the BosonNLP API Python SDK. """ news_categories = [ 'physical education', 'education', 'finance', 'society', 'entertainment', 'military', 'domestic', 'science and technology', 'the internet', 'real estate', 'international', 'women', 'car', 'game' ] def __init__(self, api_token=None): try: assert api_token is not None, "Please provide an API token" except AssertionError as e: raise self.token = api_token self.nlp = BosonNLP(self.token) def get_sentiment(self, text): pos, neg = self.nlp.sentiment(text)[0] return {'positive': pos, 'negative': neg} def classify_news(self, text): numbering = range(len(_BosonNLPWrapper.news_categories)) cats_dict = dict(zip(numbering, _BosonNLPWrapper.news_categories)) clsfy_num = self.nlp.classify(text)[0] return cats_dict[clsfy_num] def extract_keywords(self, text, top_k=3): result = self.nlp.extract_keywords( text, top_k) # outputs in sorted order of weight return [{result[i][1]: result[i][0]} for i in range(len(result))] def segment_words_and_tag(self, text): """ Splits up text into segments of "words" and tags them with their respective part of speech. See: http://docs.bosonnlp.com/tag.html Parameters ---------- text (string): text passage to segment into separate "words" and tags them with parts of speech Returns ------- list of key-value pairs {word: part-of-speech-tag} """ result = self.nlp.tag(text)[0] words = result['word'] tags = result['tag'] return [{words[i]: tags[i]} for i in range(len(words))] def get_summary(self, content, title='', pct_limit=0.2): """ Extracts a new digest (summary) of the content. See: http://docs.bosonnlp.com/summary.html Parameters ---------- text (string): text passage to summarize title (string): title of the passage (optional, may provide more accurate results) pct_limit (float): max length of the summary in terms of percentage of the original word count Returns ------- string containing the summary of the passage """ summary = self.nlp.summary(title, content, pct_limit) return summary
class BosonNlpp: def __init__(self): self.bonlp = BosonNLP('IKBIoANy.14545.A7GCYBnT9jIB') #情感分析 def testSentiment(self, s): result = self.bonlp.sentiment(s) return result #print(result) #命名实体识别 def lexicalAnalysis(self, s): result = self.bonlp.ner(s)[0] return result #依存文法分析 def textDependency(self, s): result = self.bonlp.depparser(s) return result #关键词提取 def testKeywords(self, s): result = self.bonlp.extract_keywords(s, top_k=10) return result #新闻分类 def textClassify(self, s): resultlist = self.bonlp.classify(s) classifys = { 0: '体育', 1: '教育', 2: '财经', 3: '社会', 4: '娱乐', 5: '军事', 6: '国内', 7: '科技', 8: '互联网', 9: '房产', 10: '国际', 11: '女人', 12: '汽车', 13: '游戏' } return (classifys[resultlist[0]]) #语义联想 def lexicalSynonym(self, term): result = self.bonlp.suggest(term, top_k=10) return result #分词与词性标注 def fenci(self, s): result = self.bonlp.tag(s) return result def newssubstract(self, s): #s=s.encode('utf8') s = s.decode('utf-8') result = self.bonlp.summary('', s) return result
class _BosonNLPWrapper(object): """ NLP object using the BosonNLP API Python SDK. """ news_categories = ['physical education', 'education', 'finance', 'society', 'entertainment', 'military', 'domestic', 'science and technology', 'the internet', 'real estate', 'international', 'women', 'car', 'game'] def __init__(self, api_token=None): try: assert api_token is not None, "Please provide an API token" except AssertionError as e: raise self.token = api_token self.nlp = BosonNLP(self.token) def get_sentiment(self, text): """ Performs sentiment analysis on a text passage (works for Chinese text). See: http://docs.bosonnlp.com/sentiment.html Parameters ---------- text (string): text passage to be analyzed for sentiment Returns ------- dictionary with 'positive' and 'negative' as keys with their respective weights as values >>> nlp = BosonNLPWrapper('') >>> nlp.get_sentiment('不要打擾我') {'positive': 0.3704911989140307, 'negative': 0.6295088010859693} >>> nlp.get_sentiment('我很高興跟你見面') {'positive': 0.856280735624867, 'negative': 0.14371926437513308} """ pos, neg = self.nlp.sentiment(text)[0] return {'positive': pos, 'negative': neg} def classify_news(self, text): """ Classifies news text into 14 different categories. See: http://docs.bosonnlp.com/classify.html Parameters ---------- text (string): text passage to classify into news categories defined in news_categories Returns ------- one of the 14 categories in news_categories that the text was classified into """ numbering = range(len(_BosonNLPWrapper.news_categories)) cats_dict = dict(zip(numbering, _BosonNLPWrapper.news_categories)) clsfy_num = self.nlp.classify(text)[0] return cats_dict[clsfy_num] def extract_keywords(self, text, top_k=3): """ Extracts the top k keywords and the weight of each word in the text. See: http://docs.bosonnlp.com/keywords.html Parameters ---------- text (string): text passage from which to extract keywords top_k (integer): number of keywords to return Returns ------- list of key-value pairs {word: weight} >>> nlp = BosonNLPWrapper('') >>> nlp.extract_keywords('我最愛老虎堂,奶茶香醇,波霸彈Q 好香的黑糖味') [{'波霸彈': 0.5980681967308248}, {'黑糖': 0.4699792421671365}, {'香醇': 0.4497614275300947}] """ result = self.nlp.extract_keywords(text, top_k) # outputs in sorted order of weight return [{result[i][1]: result[i][0]} for i in range(len(result))] def segment_words_and_tag(self, text): """ Splits up text into segments of "words" and tags them with their respective part of speech. See: http://docs.bosonnlp.com/tag.html Parameters ---------- text (string): text passage to segment into separate "words" and tags them with parts of speech Returns ------- list of key-value pairs {word: part-of-speech-tag} """ result = self.nlp.tag(text)[0] words = result['word'] tags = result['tag'] return [{words[i]: tags[i]} for i in range(len(words))] def get_summary(self, content, title='', pct_limit=0.2): """ Extracts a new digest (summary) of the content. See: http://docs.bosonnlp.com/summary.html Parameters ---------- text (string): text passage to summarize title (string): title of the passage (optional, may provide more accurate results) pct_limit (float): max length of the summary in terms of percentage of the original word count Returns ------- string containing the summary of the passage """ summary = self.nlp.summary(title, content, pct_limit) return summary