Beispiel #1
0
def sohu_cmt(request):
    if off:
        return HttpResponse(
            "<h1>网站暂停使用, 项目 <a href=\"http://tofind.space/NCspider/\">Github 主页</a> </h1>"
        )
    try:
        #return HttpResponse(str(sohu_comment.objects.filter(news_id=comment_id).values("author")))#留作DEBUG用
        comment_id = request.GET.get('comment_id')
        news_title = request.GET.get('title')
        lastern_comments = sohu_comment.objects.filter(news_id=comment_id)[:50]
        attitude_tendency = []
        for com in lastern_comments:
            s = nlp(com.comment)
            senti_number = s.sentiments
            attitude_tendency.append(senti_number)
            rgb_list = ["#565D8E", "#499AA1", "#FFFFFF", "#F9E063", "#BBDC37"]
            rgb_level = 2
            if senti_number > 0.7 and senti_number < 0.8:
                rgb_level = 3
            elif senti_number >= 0.8:
                rgb_level = 4
            elif senti_number < 0.35 and senti_number > 0.25:
                rgb_level = 1
            elif senti_number <= 0.25:
                rgb_level = 0
            #senti_rgb = str(hex(int( senti_number * 0xFFFFFF))).replace("0x", "#").upper()
            senti_rgb = rgb_list[rgb_level]
            com.sense_color = senti_rgb
        if len(attitude_tendency) > 0:
            tendency_number = sum(attitude_tendency) / len(attitude_tendency)
            attitude_list = ["很消极", "消极", "中立", "积极", "很积极"]
            attitude = attitude_list[int(round(tendency_number * 10 / 2)) - 1]
        else:
            attitude = "不得而知"
        return render(
            request, 'news_opin/sohu_comment_list.html', {
                'sohu_comments_list': lastern_comments,
                'attitude': attitude + "?",
                'news_title': news_title,
            })
    except Exception as e:
        return render(request, 'news_opin/error_info.html', {
            'error_info': str(e),
        })
Beispiel #2
0
def sohu_cmt(request):
    if off:
        return HttpResponse('<h1>网站暂停使用, 项目 <a href="http://tofind.space/NCspider/">Github 主页</a> </h1>')
    try:
        # return HttpResponse(str(sohu_comment.objects.filter(news_id=comment_id).values("author")))#留作DEBUG用
        comment_id = request.GET.get("comment_id")
        news_title = request.GET.get("title")
        lastern_comments = sohu_comment.objects.filter(news_id=comment_id)[:50]
        attitude_tendency = []
        for com in lastern_comments:
            s = nlp(com.comment)
            senti_number = s.sentiments
            attitude_tendency.append(senti_number)
            rgb_list = ["#565D8E", "#499AA1", "#FFFFFF", "#F9E063", "#BBDC37"]
            rgb_level = 2
            if senti_number > 0.7 and senti_number < 0.8:
                rgb_level = 3
            elif senti_number >= 0.8:
                rgb_level = 4
            elif senti_number < 0.35 and senti_number > 0.25:
                rgb_level = 1
            elif senti_number <= 0.25:
                rgb_level = 0
            # senti_rgb = str(hex(int( senti_number * 0xFFFFFF))).replace("0x", "#").upper()
            senti_rgb = rgb_list[rgb_level]
            com.sense_color = senti_rgb
        if len(attitude_tendency) > 0:
            tendency_number = sum(attitude_tendency) / len(attitude_tendency)
            attitude_list = ["很消极", "消极", "中立", "积极", "很积极"]
            attitude = attitude_list[int(round(tendency_number * 10 / 2)) - 1]
        else:
            attitude = "不得而知"
        return render(
            request,
            "news_opin/sohu_comment_list.html",
            {"sohu_comments_list": lastern_comments, "attitude": attitude + "?", "news_title": news_title},
        )
    except Exception as e:
        return render(request, "news_opin/error_info.html", {"error_info": str(e)})
Beispiel #3
0
import pandas as pd
from snownlp import SnowNLP as nlp

filePath = '../corpus/moods/喜悦.csv'

df = pd.read_csv(filePath).sample(1000)
count = 0
for index, row in df.iterrows():
    content = row['review']
    s = nlp(content)
    # print(s.sentiments)
    if s.sentiments < 0.7:
        print(s.sentiments, content)
        count += 1
print(count/df.shape[0])
# 利用snownlp交叉印证数据的质量,喜悦的准确度只有大约70%
Beispiel #4
0
 def _snow(self):
     return nlp(self.text)
Beispiel #5
0
    def sent_senti(self):
        # filtered = filter(sentence_contains_brace, self._snow.sentences)
        sentences = self._snow.sentences

        return {_: nlp(_).sentiments for _ in sentences}
 def checkDataNLP(self, factorDF=None, code_=None):
     """
     判断TXT的情感偏向
     :param self:
     :param txt: df
     :return: 返回TXT的情感偏向sentments
     """
     content_title = self.contentReplace(factorDF['title'])  # 删除空行/多余的空格
     content_datas = self.contentReplace(factorDF['datas'])  # 删除空行/多余的空格
     # # title的情感色彩分析
     factorDF['title_Marking'] = map(
         lambda x: 0 if x == '0' or len(x) == 0 else
         (float(nlp(x).sentiments)), content_title)
     # # datas的情感色彩分析
     factorDF['datas_Marking'] = map(
         lambda x: 0 if x == '0' or len(x) == 0 else
         (float(nlp(x).sentiments)), content_datas)
     upTimeValues = factorDF['upTime'].values
     factorDF['upTime'] = map(self.filterDateTime, upTimeValues)
     #print "factorDF['upTime']==",factorDF['upTime']
     factorDFNew = factorDF[[
         'code', 'title_Marking', 'datas_Marking', 'upTime'
     ]]
     upTimeList = np.array(factorDFNew['upTime'].values)
     indexListNew = [
         '0' if len(n.strip()) < 10 else (n.strip()) for n in upTimeList
     ]
     factorDFNew['upTime'] = indexListNew
     # 开始groupby 汇总每一天的数据
     factorData_DF = pd.DataFrame(
         data=factorDFNew.values,
         columns=["code", "titleMarketing", "datasMarketing", "dateUpTime"])
     factorData_DF['dateUpTime'] = map(self.secondFilterDate,
                                       factorData_DF['dateUpTime'])
     factorDFLast = factorData_DF[factorData_DF['dateUpTime'] != '0']
     code_list = factorDFLast['code'].values
     factorDFLast.drop('code', axis=1, inplace=True)
     resultDF = pd.DataFrame(
         index=factorDFLast['dateUpTime'].values,
         data=factorDFLast.values,
         columns=['titleMarketing', 'datasMarketing', 'dateUpTime'])
     resultDF['titleMarketing'] = map(lambda x: float(x),
                                      resultDF['titleMarketing'].values)
     resultDF['datasMarketing'] = map(lambda x: float(x),
                                      resultDF['datasMarketing'].values)
     resultDF.sort_values(by=['dateUpTime'], inplace=True)
     new_data = resultDF.groupby('dateUpTime').mean()
     newCount = len(new_data)
     new_data['code'] = code_list[:newCount]
     new_data['dateUpTime'] = new_data.index
     # 根据历史交易日,填充所有缺失数据
     lostDate = []  # 把所有缺失时间收集起来一起填充
     index_data = new_data.index
     for dt in self.indexDateList:
         if len(dt) < 10: pass
         else:
             dt = dt + " 00:00:00"
             if dt in index_data: continue
             else: lostDate.append(dt)
     for dt in lostDate:
         new_data.loc[dt] = {
             'titleMarketing': 0,
             'datasMarketing': 0,
             'code': code_,
             'dateUpTime': dt
         }
     new_data.sort_values(by=['dateUpTime'], inplace=True)
     new_data.set_index('dateUpTime')
     new_data['titleMarketing'] = map(
         lambda x: x if x > .5 else (0 if x == 0 else (x - .5)),
         new_data['titleMarketing'].values)
     new_data['datasMarketing'] = map(
         lambda x: x if x > .5 else (0 if x == 0 else (x - .5)),
         new_data['datasMarketing'].values)
     new_data['title_Rolling5_mean'] = np.array(
         new_data['titleMarketing'].rolling(min_periods=1,
                                            window=5).mean().values)
     new_data['title_Rolling10_mean'] = np.array(
         new_data['titleMarketing'].rolling(min_periods=1,
                                            window=10).mean().values)
     new_data['title_Rolling20_mean'] = np.array(
         new_data['titleMarketing'].rolling(min_periods=1,
                                            window=20).mean().values)
     new_data['title_Rolling30_mean'] = np.array(
         new_data['titleMarketing'].rolling(min_periods=1,
                                            window=30).mean().values)
     new_data['datas_Rolling5_mean'] = np.array(
         new_data['datasMarketing'].rolling(min_periods=1,
                                            window=5).mean().values)
     new_data['datas_Rolling10_mean'] = np.array(
         new_data['datasMarketing'].rolling(min_periods=1,
                                            window=10).mean().values)
     new_data['datas_Rolling20_mean'] = np.array(
         new_data['datasMarketing'].rolling(min_periods=1,
                                            window=20).mean().values)
     new_data['datas_Rolling30_mean'] = np.array(
         new_data['datasMarketing'].rolling(min_periods=1,
                                            window=30).mean().values)
     #date__=str(datetime.datetime.now()).replace(':','').replace('.','')
     #df.to_csv(r'{}_{}.csv'.format(columnName_,date__),encoding='utf_8_sig')
     #print u'写入完成'
     return new_data
Beispiel #7
0
from snownlp import SnowNLP as nlp

s1 = nlp(u'hahaha')
s2 = nlp(u'哈哈哈')
s3 = nlp(u'哈哈哈哈哈哈')
s4 = nlp(u'哈哈哈哈哈哈哈哈哈')

print(s1.sentiments, s2.sentiments, s3.sentiments, s4.sentiments)

s1 = nlp(u'开心')
s2 = nlp(u'快乐')
s3 = nlp(u'不开心')
s4 = nlp(u'不快乐')

print(s1.sentiments, s2.sentiments, s3.sentiments, s4.sentiments)

s1 = nlp(u'有趣')
s2 = nlp(u'无聊')
s3 = nlp(u'傻逼')
s4 = nlp(u'天使')

print(s1.sentiments, s2.sentiments, s3.sentiments, s4.sentiments)

s1 = nlp(u'不知所云')
s2 = nlp(u'耗子尾汁')
s3 = nlp(u'不明觉厉')
s4 = nlp(u'阴阳怪气')

print(s1.sentiments, s2.sentiments, s3.sentiments, s4.sentiments)

s1 = nlp(u'开心')