def sohu_cmt(request): if off: return HttpResponse( "<h1>网站暂停使用, 项目 <a href=\"http://tofind.space/NCspider/\">Github 主页</a> </h1>" ) try: #return HttpResponse(str(sohu_comment.objects.filter(news_id=comment_id).values("author")))#留作DEBUG用 comment_id = request.GET.get('comment_id') news_title = request.GET.get('title') lastern_comments = sohu_comment.objects.filter(news_id=comment_id)[:50] attitude_tendency = [] for com in lastern_comments: s = nlp(com.comment) senti_number = s.sentiments attitude_tendency.append(senti_number) rgb_list = ["#565D8E", "#499AA1", "#FFFFFF", "#F9E063", "#BBDC37"] rgb_level = 2 if senti_number > 0.7 and senti_number < 0.8: rgb_level = 3 elif senti_number >= 0.8: rgb_level = 4 elif senti_number < 0.35 and senti_number > 0.25: rgb_level = 1 elif senti_number <= 0.25: rgb_level = 0 #senti_rgb = str(hex(int( senti_number * 0xFFFFFF))).replace("0x", "#").upper() senti_rgb = rgb_list[rgb_level] com.sense_color = senti_rgb if len(attitude_tendency) > 0: tendency_number = sum(attitude_tendency) / len(attitude_tendency) attitude_list = ["很消极", "消极", "中立", "积极", "很积极"] attitude = attitude_list[int(round(tendency_number * 10 / 2)) - 1] else: attitude = "不得而知" return render( request, 'news_opin/sohu_comment_list.html', { 'sohu_comments_list': lastern_comments, 'attitude': attitude + "?", 'news_title': news_title, }) except Exception as e: return render(request, 'news_opin/error_info.html', { 'error_info': str(e), })
def sohu_cmt(request): if off: return HttpResponse('<h1>网站暂停使用, 项目 <a href="http://tofind.space/NCspider/">Github 主页</a> </h1>') try: # return HttpResponse(str(sohu_comment.objects.filter(news_id=comment_id).values("author")))#留作DEBUG用 comment_id = request.GET.get("comment_id") news_title = request.GET.get("title") lastern_comments = sohu_comment.objects.filter(news_id=comment_id)[:50] attitude_tendency = [] for com in lastern_comments: s = nlp(com.comment) senti_number = s.sentiments attitude_tendency.append(senti_number) rgb_list = ["#565D8E", "#499AA1", "#FFFFFF", "#F9E063", "#BBDC37"] rgb_level = 2 if senti_number > 0.7 and senti_number < 0.8: rgb_level = 3 elif senti_number >= 0.8: rgb_level = 4 elif senti_number < 0.35 and senti_number > 0.25: rgb_level = 1 elif senti_number <= 0.25: rgb_level = 0 # senti_rgb = str(hex(int( senti_number * 0xFFFFFF))).replace("0x", "#").upper() senti_rgb = rgb_list[rgb_level] com.sense_color = senti_rgb if len(attitude_tendency) > 0: tendency_number = sum(attitude_tendency) / len(attitude_tendency) attitude_list = ["很消极", "消极", "中立", "积极", "很积极"] attitude = attitude_list[int(round(tendency_number * 10 / 2)) - 1] else: attitude = "不得而知" return render( request, "news_opin/sohu_comment_list.html", {"sohu_comments_list": lastern_comments, "attitude": attitude + "?", "news_title": news_title}, ) except Exception as e: return render(request, "news_opin/error_info.html", {"error_info": str(e)})
import pandas as pd from snownlp import SnowNLP as nlp filePath = '../corpus/moods/喜悦.csv' df = pd.read_csv(filePath).sample(1000) count = 0 for index, row in df.iterrows(): content = row['review'] s = nlp(content) # print(s.sentiments) if s.sentiments < 0.7: print(s.sentiments, content) count += 1 print(count/df.shape[0]) # 利用snownlp交叉印证数据的质量,喜悦的准确度只有大约70%
def _snow(self): return nlp(self.text)
def sent_senti(self): # filtered = filter(sentence_contains_brace, self._snow.sentences) sentences = self._snow.sentences return {_: nlp(_).sentiments for _ in sentences}
def checkDataNLP(self, factorDF=None, code_=None): """ 判断TXT的情感偏向 :param self: :param txt: df :return: 返回TXT的情感偏向sentments """ content_title = self.contentReplace(factorDF['title']) # 删除空行/多余的空格 content_datas = self.contentReplace(factorDF['datas']) # 删除空行/多余的空格 # # title的情感色彩分析 factorDF['title_Marking'] = map( lambda x: 0 if x == '0' or len(x) == 0 else (float(nlp(x).sentiments)), content_title) # # datas的情感色彩分析 factorDF['datas_Marking'] = map( lambda x: 0 if x == '0' or len(x) == 0 else (float(nlp(x).sentiments)), content_datas) upTimeValues = factorDF['upTime'].values factorDF['upTime'] = map(self.filterDateTime, upTimeValues) #print "factorDF['upTime']==",factorDF['upTime'] factorDFNew = factorDF[[ 'code', 'title_Marking', 'datas_Marking', 'upTime' ]] upTimeList = np.array(factorDFNew['upTime'].values) indexListNew = [ '0' if len(n.strip()) < 10 else (n.strip()) for n in upTimeList ] factorDFNew['upTime'] = indexListNew # 开始groupby 汇总每一天的数据 factorData_DF = pd.DataFrame( data=factorDFNew.values, columns=["code", "titleMarketing", "datasMarketing", "dateUpTime"]) factorData_DF['dateUpTime'] = map(self.secondFilterDate, factorData_DF['dateUpTime']) factorDFLast = factorData_DF[factorData_DF['dateUpTime'] != '0'] code_list = factorDFLast['code'].values factorDFLast.drop('code', axis=1, inplace=True) resultDF = pd.DataFrame( index=factorDFLast['dateUpTime'].values, data=factorDFLast.values, columns=['titleMarketing', 'datasMarketing', 'dateUpTime']) resultDF['titleMarketing'] = map(lambda x: float(x), resultDF['titleMarketing'].values) resultDF['datasMarketing'] = map(lambda x: float(x), resultDF['datasMarketing'].values) resultDF.sort_values(by=['dateUpTime'], inplace=True) new_data = resultDF.groupby('dateUpTime').mean() newCount = len(new_data) new_data['code'] = code_list[:newCount] new_data['dateUpTime'] = new_data.index # 根据历史交易日,填充所有缺失数据 lostDate = [] # 把所有缺失时间收集起来一起填充 index_data = new_data.index for dt in self.indexDateList: if len(dt) < 10: pass else: dt = dt + " 00:00:00" if dt in index_data: continue else: lostDate.append(dt) for dt in lostDate: new_data.loc[dt] = { 'titleMarketing': 0, 'datasMarketing': 0, 'code': code_, 'dateUpTime': dt } new_data.sort_values(by=['dateUpTime'], inplace=True) new_data.set_index('dateUpTime') new_data['titleMarketing'] = map( lambda x: x if x > .5 else (0 if x == 0 else (x - .5)), new_data['titleMarketing'].values) new_data['datasMarketing'] = map( lambda x: x if x > .5 else (0 if x == 0 else (x - .5)), new_data['datasMarketing'].values) new_data['title_Rolling5_mean'] = np.array( new_data['titleMarketing'].rolling(min_periods=1, window=5).mean().values) new_data['title_Rolling10_mean'] = np.array( new_data['titleMarketing'].rolling(min_periods=1, window=10).mean().values) new_data['title_Rolling20_mean'] = np.array( new_data['titleMarketing'].rolling(min_periods=1, window=20).mean().values) new_data['title_Rolling30_mean'] = np.array( new_data['titleMarketing'].rolling(min_periods=1, window=30).mean().values) new_data['datas_Rolling5_mean'] = np.array( new_data['datasMarketing'].rolling(min_periods=1, window=5).mean().values) new_data['datas_Rolling10_mean'] = np.array( new_data['datasMarketing'].rolling(min_periods=1, window=10).mean().values) new_data['datas_Rolling20_mean'] = np.array( new_data['datasMarketing'].rolling(min_periods=1, window=20).mean().values) new_data['datas_Rolling30_mean'] = np.array( new_data['datasMarketing'].rolling(min_periods=1, window=30).mean().values) #date__=str(datetime.datetime.now()).replace(':','').replace('.','') #df.to_csv(r'{}_{}.csv'.format(columnName_,date__),encoding='utf_8_sig') #print u'写入完成' return new_data
from snownlp import SnowNLP as nlp s1 = nlp(u'hahaha') s2 = nlp(u'哈哈哈') s3 = nlp(u'哈哈哈哈哈哈') s4 = nlp(u'哈哈哈哈哈哈哈哈哈') print(s1.sentiments, s2.sentiments, s3.sentiments, s4.sentiments) s1 = nlp(u'开心') s2 = nlp(u'快乐') s3 = nlp(u'不开心') s4 = nlp(u'不快乐') print(s1.sentiments, s2.sentiments, s3.sentiments, s4.sentiments) s1 = nlp(u'有趣') s2 = nlp(u'无聊') s3 = nlp(u'傻逼') s4 = nlp(u'天使') print(s1.sentiments, s2.sentiments, s3.sentiments, s4.sentiments) s1 = nlp(u'不知所云') s2 = nlp(u'耗子尾汁') s3 = nlp(u'不明觉厉') s4 = nlp(u'阴阳怪气') print(s1.sentiments, s2.sentiments, s3.sentiments, s4.sentiments) s1 = nlp(u'开心')