def Analyzer(s): ls = [] try: ls = jiagu.sentiment(s) except: ls = ['positive', 0.5] if ls[0] == 'positive': result = ls[1] else: result = 0 - ls[1] return result
def emotions(): text = [] with open(path + "如何看待4月3日全球新冠确诊人数突破100万_我们需要多久才能控制住疫情_.csv", encoding='gbk',errors='ignore') as file: for line in file: line = line.split(',',1) # 只分一次 text.append(line[1]) # print(line[1]) # 数据的查看 # print(len(text)) # 长度100001 with open(path + '如何看待4月3日全球新冠确诊人数突破100万_我们需要多久才能控制住疫情_.txt', 'w', encoding='utf-8') as f: for i in range(len(text)): sentiment = jiagu.sentiment(text[i]) f.write(str(sentiment) + '\n')
def senticontent(chapter_id): contents_sentences = getContentSentence(chapter_id) if SentiContent.query.filter_by( sentence_id=contents_sentences[0].id).all() == []: for i, contents_sentence in enumerate(contents_sentences): sentiment = jiagu.sentiment(contents_sentence.sentenceseg) senticontent = SentiContent(senti=sentiment[0], degree=sentiment[1], sentence_id=contents_sentence.id) db.session.add(senticontent) senticontents = [{ 'sentence': contents_sentence.sentenceseg, 'sentiment': senticontent.senti, 'degree': senticontent.degree } for contents_sentence in contents_sentences for senticontent in contents_sentence.senti] return jsonify({'sentiments': senticontents}), 200
def check_comment(url, video): table_header = [ 'appearance', '弹幕模式', '字号', '颜色', '发送时间', '弹幕池', '发送者id', 'rowID', 'content', 'emotion' ] video_url = url video_html = open_url(video_url) danmu_id, video['title'], video['up'] = get_danmu_id(video_html, video_url) all_list = [] if danmu_id: danmu_url = 'http://comment.bilibili.com/{}.xml'.format(danmu_id) danmu_html = open_url(url=danmu_url) soup = BS(danmu_html, 'html.parser') all_d = soup.select('d') for d in all_d: # 把d标签中P的各个属性分离开 danmu_list = d['p'].split(',') # d.get_text()是弹幕内容 danmu_list.append(d.get_text()) nature, value = jiagu.sentiment(danmu_list[8]) if nature == 'negative': value = -value danmu_list.append(value) # danmu_list[0] = sec2str(danmu_list[0]) # danmu_list[4] = time.ctime(eval(danmu_list[4])) all_list.append(danmu_list) # print(danmu_list) # all_list.sort() df = pd.DataFrame(all_list, columns=table_header) video_df = df.iloc[:, [0, 7, 8, 9]] bullet_screen_count = video_df.shape[0] # danmu_emotion = video_df.to_dict(orient='records') if 'id' in video: id = 'id' else: id = 'aid' # dict_write(dict_content=danmu_emotion, path='screen_bullet/danmu_emotion/{}.csv'.format(video[id])) video_df.to_csv('screen_bullet/danmu_emotion/{}.csv'.format(video[id])) # video['danmu'] = danmu_emotion video['count'] = bullet_screen_count return video, danmu_id, video_df.iloc[:, 2]
def text_sentiment_zh(): """ text sentiment for chinese --- tags: - nlp parameters: - in: query name: text type: string required: true default: 你真棒! description: text content responses: 200: description: chinese sentiment response schema: type: object properties: code: type: integer description: status code sentiment: type: string enum: [negative, positive] probability: type: integer """ text = request.args.get("text") if text is None: raise ParameterLostError("sentiment_text") result = sentiment(text) return { "code": 200, "sentiment": result[0], "probability": result[1] }
def nlp_jiagu(self, btn): text = str(self.lbl.text.strip()).replace("\n", "") if text[-2:] == "qg": sentiment = jiagu.sentiment(text[:-2]) self.lbl.text = pprint.pformat(sentiment) elif text[-2:] == "cq": keywords = jiagu.keywords(text, 5) # 关键词 self.lbl.text = pprint.pformat(keywords) elif text[-2:] == "jl": if "," in self.lbl.text: docs = self.lbl.text.split(",") else: docs = self.lbl.text.split(",") #print(docs) cluster = jiagu.text_cluster(docs) self.lbl.text = pprint.pformat(cluster) else: knowledge = jiagu.knowledge(text) self.lbl.text = pprint.pformat(knowledge)
据观察者网过往报道,2017年我国全国共完成造林736.2万公顷、森林抚育830.2万公顷。其中,天然林资源保护工程完成造林26万公顷,退耕还林工程完成造林91.2万公顷。京津风沙源治理工程完成造林18.5万公顷。三北及长江流域等重点防护林体系工程完成造林99.1万公顷。完成国家储备林建设任务68万公顷。 ''' keywords = jiagu.keywords(text, 5) # 关键词抽取 print(keywords) summarize = jiagu.summarize(text, 3) # 文本摘要 print(summarize) # jiagu.findword('input.txt', 'output.txt') # 根据大规模语料,利用信息熵做新词发现。 # 知识图谱关系抽取 text = '姚明1980年9月12日出生于上海市徐汇区,祖籍江苏省苏州市吴江区震泽镇,前中国职业篮球运动员,司职中锋,现任中职联公司董事长兼总经理。' knowledge = jiagu.knowledge(text) print(knowledge) # 情感分析 text = '很讨厌还是个懒鬼' sentiment = jiagu.sentiment(text) print(sentiment) # 文本聚类(需要调参) docs = [ "百度深度学习中文情感分析工具Senta试用及在线测试", "情感分析是自然语言处理里面一个热门话题", "AI Challenger 2018 文本挖掘类竞赛相关解决方案及代码汇总", "深度学习实践:从零开始做电影评论文本情感分析", "BERT相关论文、文章和代码资源汇总", "将不同长度的句子用BERT预训练模型编码,映射到一个固定长度的向量上", "自然语言处理工具包spaCy介绍", "现在可以快速测试一下spaCy的相关功能,我们以英文数据为例,spaCy目前主要支持英文和德文" ] cluster = jiagu.text_cluster(docs) print(cluster)
import jiagu import pandas as pd from opencc import OpenCC # 用來繁轉簡,現在是在要算情緒分數時一筆一筆呼叫轉簡體,效能一定糟透,理論上也許要另外存一個欄位是簡體? cc = OpenCC('t2s') txt = pd.read_csv("clean-txt-tokenized.csv") txt["sentiment"] = 0 for i in range(len(txt)): txt["sentiment"][i] = jiagu.sentiment(cc.convert(txt["0"][i])) txt.to_csv("clean-txt-tokenized-sentiment.csv")
async def doDetectSentiment(text: str = Form(...)): return {'status': True, 'data': jiagu.sentiment(text)[0] == 'positive'}