Beispiel #1
0
def Analyzer(s):
    ls = []
    try:
        ls = jiagu.sentiment(s)
    except:
        ls = ['positive', 0.5]
    if ls[0] == 'positive':
        result = ls[1]
    else:
        result = 0 - ls[1]
    return result
def emotions():
    text = []
    with open(path + "如何看待4月3日全球新冠确诊人数突破100万_我们需要多久才能控制住疫情_.csv", encoding='gbk',errors='ignore') as file:
        for line in file:
            line = line.split(',',1) # 只分一次
            text.append(line[1])
            # print(line[1])  # 数据的查看

    # print(len(text)) # 长度100001


    with open(path + '如何看待4月3日全球新冠确诊人数突破100万_我们需要多久才能控制住疫情_.txt', 'w', encoding='utf-8') as f:
        for i in range(len(text)):
            sentiment = jiagu.sentiment(text[i])
            f.write(str(sentiment) + '\n')
def senticontent(chapter_id):
    contents_sentences = getContentSentence(chapter_id)

    if SentiContent.query.filter_by(
            sentence_id=contents_sentences[0].id).all() == []:
        for i, contents_sentence in enumerate(contents_sentences):
            sentiment = jiagu.sentiment(contents_sentence.sentenceseg)
            senticontent = SentiContent(senti=sentiment[0],
                                        degree=sentiment[1],
                                        sentence_id=contents_sentence.id)
            db.session.add(senticontent)

    senticontents = [{
        'sentence': contents_sentence.sentenceseg,
        'sentiment': senticontent.senti,
        'degree': senticontent.degree
    } for contents_sentence in contents_sentences
                     for senticontent in contents_sentence.senti]

    return jsonify({'sentiments': senticontents}), 200
Beispiel #4
0
def check_comment(url, video):
    table_header = [
        'appearance', '弹幕模式', '字号', '颜色', '发送时间', '弹幕池', '发送者id', 'rowID',
        'content', 'emotion'
    ]
    video_url = url
    video_html = open_url(video_url)
    danmu_id, video['title'], video['up'] = get_danmu_id(video_html, video_url)
    all_list = []
    if danmu_id:
        danmu_url = 'http://comment.bilibili.com/{}.xml'.format(danmu_id)
        danmu_html = open_url(url=danmu_url)
        soup = BS(danmu_html, 'html.parser')
        all_d = soup.select('d')
        for d in all_d:
            # 把d标签中P的各个属性分离开
            danmu_list = d['p'].split(',')
            # d.get_text()是弹幕内容
            danmu_list.append(d.get_text())
            nature, value = jiagu.sentiment(danmu_list[8])
            if nature == 'negative':
                value = -value
            danmu_list.append(value)
            # danmu_list[0] = sec2str(danmu_list[0])
            # danmu_list[4] = time.ctime(eval(danmu_list[4]))
            all_list.append(danmu_list)
            # print(danmu_list)
        # all_list.sort()
        df = pd.DataFrame(all_list, columns=table_header)
        video_df = df.iloc[:, [0, 7, 8, 9]]
        bullet_screen_count = video_df.shape[0]
        # danmu_emotion = video_df.to_dict(orient='records')
        if 'id' in video:
            id = 'id'
        else:
            id = 'aid'
        # dict_write(dict_content=danmu_emotion, path='screen_bullet/danmu_emotion/{}.csv'.format(video[id]))
        video_df.to_csv('screen_bullet/danmu_emotion/{}.csv'.format(video[id]))
        # video['danmu'] = danmu_emotion
        video['count'] = bullet_screen_count
    return video, danmu_id, video_df.iloc[:, 2]
Beispiel #5
0
def text_sentiment_zh():
    """
    text sentiment for chinese
    ---
    tags:
        -   nlp
    parameters:
        -   in: query
            name: text
            type: string
            required: true
            default: 你真棒!
            description: text content
    responses:
        200:
            description: chinese sentiment response
            schema:
                type: object
                properties:
                    code:
                        type: integer
                        description: status code
                    sentiment:
                        type: string
                        enum: [negative, positive]
                    probability:
                        type: integer

    """
    text = request.args.get("text")

    if text is None:
        raise ParameterLostError("sentiment_text")

    result = sentiment(text)

    return {
        "code": 200,
        "sentiment": result[0],
        "probability": result[1]
    }
Beispiel #6
0
    def nlp_jiagu(self, btn):

        text = str(self.lbl.text.strip()).replace("\n", "")
        if text[-2:] == "qg":
            sentiment = jiagu.sentiment(text[:-2])
            self.lbl.text = pprint.pformat(sentiment)
        elif text[-2:] == "cq":
            keywords = jiagu.keywords(text, 5)  # 关键词
            self.lbl.text = pprint.pformat(keywords)
        elif text[-2:] == "jl":
            if "," in self.lbl.text:

                docs = self.lbl.text.split(",")
            else:
                docs = self.lbl.text.split(",")
            #print(docs)
            cluster = jiagu.text_cluster(docs)
            self.lbl.text = pprint.pformat(cluster)

        else:
            knowledge = jiagu.knowledge(text)
            self.lbl.text = pprint.pformat(knowledge)
Beispiel #7
0
据观察者网过往报道,2017年我国全国共完成造林736.2万公顷、森林抚育830.2万公顷。其中,天然林资源保护工程完成造林26万公顷,退耕还林工程完成造林91.2万公顷。京津风沙源治理工程完成造林18.5万公顷。三北及长江流域等重点防护林体系工程完成造林99.1万公顷。完成国家储备林建设任务68万公顷。
'''

keywords = jiagu.keywords(text, 5)  # 关键词抽取
print(keywords)

summarize = jiagu.summarize(text, 3)  # 文本摘要
print(summarize)

# jiagu.findword('input.txt', 'output.txt') # 根据大规模语料,利用信息熵做新词发现。

# 知识图谱关系抽取
text = '姚明1980年9月12日出生于上海市徐汇区,祖籍江苏省苏州市吴江区震泽镇,前中国职业篮球运动员,司职中锋,现任中职联公司董事长兼总经理。'
knowledge = jiagu.knowledge(text)
print(knowledge)

# 情感分析
text = '很讨厌还是个懒鬼'
sentiment = jiagu.sentiment(text)
print(sentiment)

# 文本聚类(需要调参)
docs = [
    "百度深度学习中文情感分析工具Senta试用及在线测试", "情感分析是自然语言处理里面一个热门话题",
    "AI Challenger 2018 文本挖掘类竞赛相关解决方案及代码汇总", "深度学习实践:从零开始做电影评论文本情感分析",
    "BERT相关论文、文章和代码资源汇总", "将不同长度的句子用BERT预训练模型编码,映射到一个固定长度的向量上",
    "自然语言处理工具包spaCy介绍", "现在可以快速测试一下spaCy的相关功能,我们以英文数据为例,spaCy目前主要支持英文和德文"
]
cluster = jiagu.text_cluster(docs)
print(cluster)
Beispiel #8
0
import jiagu
import pandas as pd
from opencc import OpenCC

# 用來繁轉簡,現在是在要算情緒分數時一筆一筆呼叫轉簡體,效能一定糟透,理論上也許要另外存一個欄位是簡體?
cc = OpenCC('t2s')
txt = pd.read_csv("clean-txt-tokenized.csv")
txt["sentiment"] = 0
for i in range(len(txt)):
    txt["sentiment"][i] = jiagu.sentiment(cc.convert(txt["0"][i]))

txt.to_csv("clean-txt-tokenized-sentiment.csv")
Beispiel #9
0
async def doDetectSentiment(text: str = Form(...)):
    return {'status': True, 'data': jiagu.sentiment(text)[0] == 'positive'}