def analyze(sentence):
    s = SnowNLP(sentence)
    if (round(s.sentiments) >= 0.5):
        return (round(round(s.sentiments * 10)))
    else:
        return (round(round(s.sentiments * 10) * (-2)))
예제 #2
0
        query_id = []
        passage = []
        query = []
        answer = []
        alternatives = []
        passage_len = []
        query_in_char = []
        query_in_word = []
        query_in_char_set = []
        query_in_word_set = []
        ques_mark = []
        for i in range(0, len(lines)):
            l = lines[i]
            ge = json.loads(l)

            que = SnowNLP(ge.get('query', '')).han
            query.append(que)

            query_id.append(ge.get('query_id', ''))
            pas = SnowNLP(ge.get('passage', '')).han
            try:
                pas = pas.split('?')[-1].strip()
            except:
                pass
            pas = pas.replace(que, '')

            if len(pas) < 2:
                pas = SnowNLP(ge.get('passage', '')).han
                print(ge.get('passage', ''))
                print(ge.get('query', ''))
                print(ge.get('answer', ''))
예제 #3
0
def split_all():
    input_list_1 = 'list/data_location/mycode/china_loc_list_full.txt'
    input_list = 'list/data_location/mycode/china_prov_list_simple.txt'
    f_in = open(input_list, 'r')
    provs = f_in.readlines()
    f_in.close()
    provs = [prov.strip() for prov in provs]
    save_dir = './data/select_prov/'

    f = open('./list/stop_words.txt', 'r')
    stop_words = f.readlines()
    f.close()
    stop_words = [i.strip() for i in stop_words]

    count_all = {}
    for prov in provs:
        for year in xrange(2002, 2018):
            year = str(year)
            label = save_dir + year + '/' + ''.join(
                [str(i) for i in SnowNLP(unicode(prov)).pinyin])
            #print label
            year_dict = read_dict(label + '_all_count.txt')
            for key in year_dict:
                if key not in count_all:
                    count_all[key] = np.zeros(17, 'float')
                count_all[key][0] += year_dict[key]
                count_all[key][int(year) - 2001] += year_dict[key]

    for prov in provs:
        loc_counts = make_dict(input_list_1)
        word_counts = {}
        for year in xrange(2002, 2018):
            year = str(year)
            label = save_dir + year + '/' + ''.join(
                [str(i) for i in SnowNLP(unicode(prov)).pinyin])
            #print label
            year_dict = read_dict(label + '_all_count.txt')
            for key in loc_counts.keys():
                if key in year_dict:
                    loc_counts[key][0] += year_dict[key]
                    loc_counts[key][int(year) - 2001] = year_dict[key]
            for key in year_dict:
                if key in word_counts:
                    word_counts[key] += year_dict[key]
                else:
                    word_counts[key] = year_dict[key]
        for key in loc_counts.keys():
            for i in xrange(0, 13):
                if loc_counts[key][i] != 0:
                    loc_counts[key][i] /= count_all[key][i]


#        for key in word_counts.keys():
#            word_counts[key] /= count_all[key][0]
        print(prov)
        word_counts = sorted(word_counts.items(),
                             key=lambda x: x[1],
                             reverse=True)
        loc_counts = sorted(loc_counts.items(),
                            key=lambda x: x[1][0],
                            reverse=True)

        # _all_stop: all_count - stop_words
        f = open(
            save_dir + 'all/' +
            ''.join([str(i) for i in SnowNLP(unicode(prov)).pinyin]) +
            '_all_stop.txt', 'w')
        for k, v in word_counts:
            if k not in stop_words and len(k.decode()) > 1:
                f.write(k + ' ' + str(v) + '\n')
        f.close()

        ## _all_count: all_count(word, count)
        #f = open(save_dir+'all/'+''.join([str(i) for i in SnowNLP(unicode(prov)).pinyin])+'_all_count.txt', 'w')
        #for k,v in word_counts:
        #    if k not in stop_words:
        #        f.write(k+' '+str(v)+'\n')
        #f.close()

        ## _all_count_list: [word_in_list, all_count, 16*year_count]
        #f_1 = open(save_dir+'all/'+''.join([str(i) for i in SnowNLP(unicode(prov)).pinyin])+'_all_count_list.txt', 'w')
        #for k,v in loc_counts:
        #    f_1.write(k+' '+array_2_str(v)+'\n')
        #f_1.close()

    count_all = sorted(count_all.items(), key=lambda x: x[1][0], reverse=True)
    f_1 = open(save_dir + 'all_count.txt', 'w')
    for k, v in count_all:
        if k not in stop_words:
            f_1.write(k + ' ' + array_2_str(v.astype('int')) + '\n')
    f_1.close()
예제 #4
0
def _sentiment(text):
    s = SnowNLP(text)
    return s.sentiments
예제 #5
0
# -*- coding: utf-8 -*-

from snownlp import SnowNLP
import codecs


def load_sentences(path):

    with codecs.open(path, encoding='utf-8-sig') as f1:
        text = f1.read()

    return text.replace("\r", "").split("\n")


for sent in s.sentences:
    print(sent, SnowNLP(sent).sentiments)


def test_new_word():

    new_word = {"positive_word": [], "negative_word": []}
    with codecs.open(Path1 + "new_word.txt", encoding="utf-8-sig") as f1:
        for line in f1.readlines():
            direction, word = line.replace("\r\n", "").split(" ")[:2]

            if direction == u"positive":
                new_word["positive_word"].append(word)

            if direction == u"negative":
                new_word["negative_word"].append(word)
예제 #6
0
 def sentiment_analysis(text):
     snownlp = SnowNLP(text)
     return Comment.rank_sentiments(snownlp.sentiments)
예제 #7
0
from snownlp import SnowNLP
text="皱皱的颜色。也不鲜艳。很喜欢。但总归。还是嫌弃。"
s = SnowNLP(text)
for i in s.sentences:
    #print(i)
    s1 = SnowNLP(i)
    #print(s1.sentiments)
text="皱皱的颜色也不鲜艳很喜欢但总归还是嫌弃"
a = SnowNLP(text)
print(text)
print(a.sentiments)
text="皱皱的颜色也不鲜艳很嫌弃但总归还是喜欢"
a = SnowNLP(text)
print(text)
print(a.sen timents)
예제 #8
0
def NewsProcess(data):
    s = SnowNLP(data)
    print(s.words)
    print("##" + str(s.sentiments))
예제 #9
0
파일: test.py 프로젝트: swagglian/snownlp
#     sents = normal.get_sentences(t)
#     doc = []
#     for sent in sents:
#         words = seg.seg(sent)
#         words = normal.filter_stop(words)
#         doc.append(words)
#     rank = textrank.TextRank(doc)
#     rank.solve()
#     for index in rank.top_index(5):
#         print(sents[index])
#     keyword_rank = textrank.KeywordTextRank(doc)
#     keyword_rank.solve()
#     for w in keyword_rank.top_index(5):
#         print(w)
sentimentslist = []
data = xlrd.open_workbook('MOBICK.xlsx')
table = data.sheets()[0]
nrows = table.nrows
for i in range(nrows):
    str = "".join('%s' % id for id in table.row_values(i))
    s = SnowNLP(str)
    print(table.row_values(i), s.sentiments)
    sentimentslist.append(s.sentiments)
fileObject = open('MOBICK.txt', 'w+')
for ip in sentimentslist:
    fileObject.write(__builtins__.str(ip))
    fileObject.write('\n')
fileObject.close()
fig1 = plt.figure("sentiment")
plt.hist(sentimentslist, bins=np.arange(0, 1, 0.02))
plt.show()
예제 #10
0
def comment_analysis(pagedata):
    jsondata = str(pagedata)
    df = pd.DataFrame(eval(jsondata))
    df = df.drop_duplicates()
    #Constuct Features
    ud = []
    ff = []
    fo = []
    for i in range(0, len(df)):
        try:
            #trim Comments
            s = df.loc[i, "Comments"]
            df.loc[i, "Comments"] = s[1:len(s) - 1]
            #drop rows with pure repost
            if (df.loc[i, "Comments"] == "转发微博"):
                df = df.drop(i)
            else:
                if ("回复@" in df.loc[i, "Comments"]):
                    s = df.loc[i, "Comments"]
                    df.loc[i, "Comments"] = s[s.index(':') + 1:len(s)]

                #label user description attribute
                if (str(df.loc[i, "User_description"]) == "nan"):
                    ud.append(0)
                else:
                    ud.append(1)

                #label verified user attribute
                if (df.loc[i, "Verified"] == 0):
                    df.loc[i, "Verified"] = 1
                elif (df.loc[i, "Verified"] == -1):
                    df.loc[i, "Verified"] = 0

                #calculate follower/following and follower/originalPost ratio
                if df.loc[i, "Follower"] == 0:
                    df.loc[i, "Follower"] = 1
                ff.append(df.loc[i, "Following"] / df.loc[i, "Follower"])
                fo.append(df.loc[i, "Original_post"] / df.loc[i, "Follower"])
        except KeyError:
            ud.append(-1)
            ff.append(-1)
            fo.append(-1)

    df = df.reset_index(drop=True)
    #add features from post/user information
    df['description'] = ud
    df['ffRatio'] = ff
    df['foRatio'] = fo
    for i in range(0, len(df)):
        if df.loc[i, "description"] == -1:
            df = df.drop(i)
            ud.remove(-1)
            ff.remove(-1)
            fo.remove(-1)

    df = df.reset_index(drop=True)

    #Append Sentiment Score
    pynlpir.open()
    segPosts = []
    sentiScore = []
    translator = Translator()
    r = '[’!?:;【】,《》!"#$%&\'()()“”…*+,-./:;<=>?@[\\]^_`{|}~]+'
    #Append Sentiment Score for all Comments under original post
    for i in range(0, len(df)):
        df.loc[i, "Comments"] = re.sub(r, '', df.loc[i, "Comments"])
        try:
            if "en" in str(detect(df.loc[i, "Comments"])):
                transText = str(
                    translator.translate(df.loc[i, "Comments"],
                                         src='en',
                                         dest='zh-cn').text)
                line = transText.strip()
                s = SnowNLP(line)
                senti = (s.sentiments +
                         SnowNLP(re.sub("[0-9]", "", line)).sentiments) / 2
                sentiScore.append(senti)
                seg = pynlpir.segment(line, pos_tagging=False)
                segPosts.append(seg)
            elif "zh-cn" or "zh-tw" or "ko" in str(
                    detect(df.loc[i, "Comments"])):
                line = df.loc[i, "Comments"].strip()
                s = SnowNLP(line)
                senti = (s.sentiments +
                         SnowNLP(re.sub("[0-9]", "", line)).sentiments) / 2
                sentiScore.append(senti)
                seg = pynlpir.segment(line, pos_tagging=False)
                segPosts.append(seg)
            else:
                #drop rows without valid sentiment scores
                print("error1")
                df = df.drop(i)
        except:
            print("error2")
            df = df.drop(i)

    df = df.reset_index(drop=True)
    df['Sentiment'] = sentiScore
    df = df.drop('Follower', axis=1).drop('Following', axis=1).drop(
        'User_description',
        axis=1).drop('Original_post',
                     axis=1).drop("Comments", axis=1).drop("Username",
                                                           axis=1).drop("UID",
                                                                        axis=1)
    pd.set_option('display.max_rows', 400)
    df = df.round({'Sentiment': 5})

    return df
예제 #11
0
def senti(x):
	return (x[0],[SnowNLP(x[1]).sentiments,1])
예제 #12
0
파일: jd_pre.py 프로젝트: lino546464/jd_nlp
    plt.xlabel('Epochs')
    plt.ylabel('Loss and Acc')
    plt.legend()
    #plt.show()

    pre_y = model.predict(pre_x, batch_size=32)
    pre_y_class = model.predict_classes(pre_x, batch_size=32)
    pre_y = [round(x[0], 3) for x in pre_y]
    pre_y_class = [x[0] for x in pre_y_class]
    print(pre_y[:10])
    print(pre_y_class[:10])

    # 关键词情感分类
    star = []
    for it in x_pre:
        s = SnowNLP(it)
        t = s.sentiments
        star.append(round(t, 3))
    print(star[:10])

    ##汇总
    star_class = np.array(star)
    star_class = np.where(star_class >= 0.5, 1, 0).tolist()
    x_pre = x_pre.tolist()
    y_label = y_label.tolist()
    # pre_y = pre_y.tolist()
    # pre_y_class = pre_y_class.tolist()

    # 验证模型准确率
    acc1 = accuracy_score(y_label, pre_y_class)
    acc2 = accuracy_score(y_label,star_class)
def fenci(string):
    s = SnowNLP(string)
    #result = s.words
    #print(len(result), '/'.join(result))                 #  。words 分词

    print('Sentiments:', s.sentiments)  #  。sentiments 情感分析
예제 #14
0
from snownlp import SnowNLP

text = '我来到河北保定河北大学上学'
s = SnowNLP(text)
print(s.words)
예제 #15
0
 def snownlp_segment(self, sentence):
     # snownlp分词
     # unicode_sentence = sentence.decode('gbk')
     sentence = SnowNLP(sentence).words
     return ' '.join(sentence)
예제 #16
0
(7)文本关键词和文本摘要提取(TextRank算法)

(8)计算文档词频(TF,Term Frequency)和逆向文档频率(IDF,Inverse Document Frequency)

(9)Tokenization(分割成句子)

(10)文本相似度计算(BM25)

SnowNLP的最大特点是特别容易上手,用其处理中文文本时能够得到不少有意思的结果,但不少功能比较简单,还有待进一步完善。


# In[10]:


from snownlp import SnowNLP
s=SnowNLP(u'杭州西湖风景很好,是旅游胜地,每年吸引大量前来游玩的游客!')
#分词
print(s.words)


# In[11]:

#情感词性计算
print("该文本的情感词性为正的概率:" + str(s.sentiments))


# In[12]:

_s=SnowNLP(u'今天又是下雨又是刮风,真是糟糕透了!')
print("该文本的情感词性为正的概率:" + str(_s.sentiments))
예제 #17
0
# -*- coding: utf-8 -*-
import pandas as pd
from snownlp import SnowNLP

if __name__ == '__main__':
    test = pd.read_csv(r"TestModel.csv")
    review_list = [review for review in test['review']]
    label_list = [label for label in test['label']]
    list_test = [(label, review)
                 for label, review in list(zip(label_list, review_list))
                 if type(review) != float]

    for j in list_test:
        print(j[1], j[0], SnowNLP(j[1]).sentiments)

    senti = [SnowNLP(review).sentiments for label, review in list_test]

    newSenti = []
    for i in senti:  # 预测结果为pos的概率,大于0.6我们认定为积极评价
        if (i >= 0.6):
            newSenti.append(1)
        else:
            newSenti.append(0)

    counts = 0
    for i in range(len(list_test)):
        if (newSenti[i] == list_test[i][0]):
            counts += 1

    accuracy = float(counts) / float(len(list_test))
    print("准确率为:%.2f" % accuracy)
def qingxu_number(text):
    s = SnowNLP(text)
    print('本句情绪指数:%f' % s.sentiments)
예제 #19
0
"""
中文文本的情感分析
@Date 2020.04.28
pip3 install snownlp
"""

from snownlp import SnowNLP

text = '人们日常所犯最大的错误,是对陌生人太客气,而对亲密的人太苛刻了,而努力改掉这个习惯。'
s = SnowNLP(text)
# 分词
print(s.words)
# 词性标注
tags = [x for x in s.tags]
print(tags)
# 断句
print(s.sentences)
# 拼音
print(s.pinyin)

# 情绪判断,返回值为正面情绪的概率,越接近1表示正面情绪,越接近0表示负面情绪
text1 = '这部电影太棒了'
text2 = '这部电影简直是烂到爆'
s1 = SnowNLP(text1)
s2 = SnowNLP(text2)
# 这部电影太棒了 0.9829468270441747
print(text1, s1.sentiments)
# 这部电影简直是烂到爆 0.2296519477554504
print(text2, s2.sentiments)

# 关键字抽取
예제 #20
0
# coding=utf8
from snownlp import sentiment
from snownlp import SnowNLP

s = SnowNLP(u"这个东西真赞")

print(s.sentiments)
예제 #21
0
def Txtmine(indata):
    data = indata[6]
    title = indata[2]
    dresult["title"] = title
    dresult["article"] = data
    mylen = len(data)
    line_num = data.count("\n")
    """字数行数"""
    dresult["wordnums"] = mylen
    dresult["linenums"] = line_num
    """词频"""
    words = jieba.lcut(data)
    counts = {}
    for word in words:
        if len(word) == 1:
            continue
        elif word.isdigit():
            continue
        else:
            rword = word
        counts[rword] = counts.get(rword, 0) + 1

    items = list(counts.items())
    items.sort(key=lambda x: x[1], reverse=True)
    dresult["words"] = items[0:3]
    """情感"""
    s = SnowNLP(data)
    dresult["sentiments"] = s.sentiments
    """摘要"""
    s = SnowNLP(data.replace("不", ""))
    # print(s.keywords())
    swords = list()
    for word in s.keywords():
        if len(word) < 2:
            continue
        else:
            swords.append(word)
    dresult["keywords"] = swords
    summary = list(set(s.summary()))[:3:1]
    # print(summary)
    lines = ""
    for line in summary:
        # print(line)
        lines = lines + line + " //// "
    dresult["summary"] = lines
    """特征值"""
    values = [i for i in indata[7:]]
    dresult["val"] = values
    """超链接数目"""
    results = re.findall("(?isu)(https\://[a-zA-Z0-9\.\?/&\=\:]+)", data)
    dresult["urlnums"] = len(results)
    """错字检测"""
    r = requests.post("http://www.cuobiezi.net/api/v1/zh_spellcheck/json",
                      data={
                          'content': '我最喜欢的就是元啊节吃汤圆。 ',
                          'check_mode': 'advanced',
                          'action': 'show'
                      })
    """"""
    loc = 0
    org = 0
    peo = 0
    words, ners = fool.analysis(data)
    for ner in ners[0]:
        if ner[2] == "location":
            loc += 1
        elif ner[2] == "org":
            org += 1
        elif ner[2] == "person":
            peo += 1
        else:
            continue
    dresult["loc"] = loc
    dresult["org"] = org
    dresult["peo"] = peo

    # print(dresult)

    return dresult
예제 #22
0
# coding:utf-8
# SnowNLP是python中用来处理文本内容的,
# 可以用来分词、标注、文本情感分析等,情感分析是简单的将文本分为两类,
# 积极和消极,返回值为情绪的概率,越接近1为积极,接近0为消极。代码如下:


import numpy as np
from snownlp import SnowNLP
import matplotlib.pyplot as plt

f = open('avengers.txt', 'r')
list = f.readlines()
sentimentslist = []
for i in list:
    s = SnowNLP(i)
    # print s.sentiments
    sentimentslist.append(s.sentiments)
plt.hist(sentimentslist, bins=np.arange(0, 1, 0.01), facecolor='g')
plt.xlabel('Sentiments Probability')
plt.ylabel('Quantity')
plt.title('Analysis of Sentiments')
plt.show()
예제 #23
0
def process_data_8(text_name, gt_name, save_name, simple):
    data = {}
    with open(text_name, 'r', encoding='utf-8') as f:
        for line in f.readlines():
            line = line.strip()
            id = line.split()[0][5:-1]
            text = ''.join(line.split()[1:])
            if simple:
                if len(text) != len(SnowNLP(text).han):
                    print(text)
                    han = list(SnowNLP(text).han)
                    new_han = []
                    k = 0
                    for i in range(len(han)):
                        if i < k:
                            continue
                        if i+3 < len(han) and han[i:i+4] == ['公', '共', '汽', '车']:
                            new_han += ['公', '车']
                            k = i + 4
                        else:
                            new_han += han[i]
                    new_text = ''.join(new_han)
                    if len(text) != len(new_text):
                        han = list(new_text)
                        new_han = []
                        k = 0
                        for i in range(len(han)):
                            if i < k:
                                continue
                            if i + 2 < len(han) and han[i:i + 3] == ['出', '租', '车']:
                                new_han += ['的', '士']
                                k = i + 3
                            else:
                                new_han += han[i]
                        new_text = ''.join(new_han)
                        print(new_text)
                        if len(text) != len(new_text):
                            han = list(new_text)
                            new_han = []
                            k = 0
                            for i in range(len(han)):
                                if i < k:
                                    continue
                                if i + 2 < len(han) and han[i:i + 3] == ['因', '特', '网']:
                                    new_han += ['网', '际', '网', '络']
                                    k = i + 3
                                else:
                                    new_han += han[i]
                            new_text = ''.join(new_han)
                            print(new_text)
                    assert len(text) == len(new_text)
                    text = new_text
                else:
                    text = SnowNLP(text).han
            data[id] = {'text': text}
    with open(gt_name, 'r', encoding='utf-8') as f:
        for line in f.readlines():
            line = line.strip().split(', ')
            answer = []
            # extract wrong word id and replaced word
            if line[1] == '0':
                data[line[0]]['answer'] = []
            else:
                for i in range(1, len(line), 2):
                    if simple:
                        line[i + 1] = SnowNLP(line[i+1]).han
                    answer.append([int(line[i]), line[i+1]])
                data[line[0]]['answer'] = answer
    with open(save_name, 'w', encoding='utf-8') as f:
        json.dump(data, f)
예제 #24
0
파일: nlp.py 프로젝트: fribble186/barcode
from snownlp import SnowNLP

text1 = '垃圾东西,很难吃'


def rank_sentiments(sentiments):
    if sentiments >= 0.8:
        return 5
    elif sentiments >= 0.6:
        return 4
    elif sentiments >= 0.4:
        return 3
    elif sentiments >= 0.2:
        return 2
    else:
        return 1

s1 = SnowNLP(text1)


print(rank_sentiments(s1.sentiments))
예제 #25
0
def split_all():
    input_list_1 = 'list/bag.txt'
    input_list = 'list/data_location/mycode/china_prov_list_simple.txt'
    f_in = open(input_list, 'r')
    provs = f_in.readlines()
    f_in.close()
    provs = [prov.strip() for prov in provs]
    save_dir = './data/select_prov/'

    f = open('./list/stop_words.txt', 'r')
    stop_words = f.readlines()
    f.close()
    stop_words = [i.strip() for i in stop_words]

    for prov in provs:
        loc_counts = make_dict(input_list_1)
        word_counts = {}
        for year in xrange(2002, 2018):
            year = str(year)
            label = save_dir + year + '/' + ''.join(
                [str(i) for i in SnowNLP(unicode(prov)).pinyin])
            #print label
            year_dict = read_dict(label + '_count_search.txt')
            for key in loc_counts.keys():
                for key_split in key.split():
                    if key_split in year_dict:
                        loc_counts[key][0] += year_dict[key_split]
                        loc_counts[key][int(year) -
                                        2001] = year_dict[key_split]
            for key in year_dict:
                for key_split in key.split():
                    if key in word_counts:
                        word_counts[key] += year_dict[key_split]
                    else:
                        word_counts[key] = year_dict[key_split]
        print(prov)
        word_counts = sorted(word_counts.items(),
                             key=lambda x: x[1],
                             reverse=True)
        loc_counts = sorted(loc_counts.items(),
                            key=lambda x: x[1][0],
                            reverse=True)

        # _all_stop: all_count - stop_words
        #f = open(save_dir+'all/'+''.join([str(i) for i in SnowNLP(unicode(prov)).pinyin])+'_all_stop.txt', 'w')
        #for k,v in word_counts:
        #    if k not in stop_words:
        #        f.write(k+' '+str(v)+'\n')
        #f.close()

        ## _all_count: all_count(word, count)
        #f = open(save_dir+'all/'+''.join([str(i) for i in SnowNLP(unicode(prov)).pinyin])+'_all_count.txt', 'w')
        #for k,v in word_counts:
        #    if k not in stop_words:
        #        f.write(k+' '+str(v)+'\n')
        #f.close()

        # _all_count_list: [word_in_list, all_count, 16*year_count]
        f_1 = open(
            save_dir + 'all/' +
            ''.join([str(i) for i in SnowNLP(unicode(prov)).pinyin]) +
            '_count_search_bag.txt', 'w')
        for k, v in loc_counts:
            f_1.write(k + ' ' + array_2_str(v) + '\n')
        f_1.close()
예제 #26
0
def get_sentiment(word):
    text = u'{}'.format(word)
    s = SnowNLP(text)
    print(s.sentiments)
예제 #27
0
from snownlp import SnowNLP
import matplotlib.pyplot as plt
import numpy as np
conn = pymysql.connect(host='127.0.0.1',
                       user='******',
                       password='',
                       charset='utf8')
with conn:
    cur = conn.cursor()
    cur.execute("SELECT * FROM sinaweibo.macomment")
    rows = cur.fetchall()
commentlist = []
for row in rows:
    row = list(row)
    del row[0]
    if row not in commentlist:
        commentlist.append(row[2])
conn.close()
print("Finish fetching the comment data...")
# print(commentlist)

# snowanalysis
print("Start SnowNLP data")
sentimentslist = []
for com in commentlist:
    s = SnowNLP(com)
    sentimentslist.append(s.sentiments)

# print(sentimentslist)
plt.hist(sentimentslist, bins=np.arange(0, 1, 0.02))
plt.show()
예제 #28
0
from snownlp import SnowNLP
text = '手机质量非常好,非常漂亮!'
s = SnowNLP(text)

# print(s.words)
# tags = [x for x in s.tags]
# print(tags)

# print(s.sentiments)
# print(s.keywords(limit=20))

print(s.sentences)

for sen in s.sentences:
    t = SnowNLP(sen)
    tags = [x for x in t.tags]
    for k, v in tags:
        if (v == 'n'):
            print(k)
    print(tags)
예제 #29
0
#encoding=utf-8
from snownlp import SnowNLP
"""汉字转拼音"""
s = SnowNLP(u'这个东西真心很赞')
print s.pinyin  # [u'zhe', u'ge', u'dong', u'xi',
#  u'zhen', u'xin', u'hen', u'zan']
예제 #30
0
def saveComments():
    index = 0
    mongoClient = MongoClient('172.24.177.30', 27017)
    db = mongoClient['futures_data']
    posts = db['comments']
    sinanews = db['sinanews']
    for data in loadUrls():
        try:
            index += 1
            tempList = []
            url = data['url']
            id = data['id']
            print(url)

            url = getCUrl(url)
            request.urlopen(url)
            response = request.urlopen(url + '&page=1&page_size=200')
            jsonStr = response.read()

            print(jsonStr[9:])
            jsonObj = json.loads(jsonStr[9:])

            tempList.extend(jsonObj['result']['cmntlist'])

            count = int(jsonObj['result']['count']['show'])
            pageNum = math.ceil(count / 200)
            if pageNum > 1:
                for page in range(2, pageNum + 1):
                    response = request.urlopen(url +
                                               ('&page=%s&page_size=200' %
                                                page))
                    jsonStr = response.read()
                    jsonObj = json.loads(jsonStr[9:])
                    tempList.extend(jsonObj['result']['cmntlist'])
                    print('page: %s' % page)

            print(len(tempList))
            # break
            sentiments = []
            positive = 0
            negative = 0
            for temp in tempList:
                s = SnowNLP(temp['content'])
                sentiments.append(s.sentiments)
                if s.sentiments > 0.5:
                    positive += 1
                else:
                    negative += 1
            if len(sentiments) != 0:
                sentiment = np.mean(sentiments)
            else:
                sentiment = -1
            # saveObj = {'id': id, 'url': url, 'comment': tempList, 'sentiment': sentiment, 'positive': positive, 'negative': negative}
            print('第' + str(index) + '篇', sentiment, data['id'])
            # posts.insert_one(saveObj)
            sinanews.update(
                {'_id': ObjectId(data['id'])},
                {'$set': {
                    'positive': positive,
                    'negative': negative
                }})
        except Exception as e:
            print(e)