def sentence_score(sentence): final_score = [] cuted_review = tp.cut_sentence(sentence) #cut sentence into subsentences # for w in cuted_review: # print w for sent in cuted_review: seg_sent = tp.segmentation(sent) # segment words seg_sent = tp.del_stopwords(seg_sent)[:] # for w in seg_sent: # print w i = 0 # current location s = 0 # emotion word location poscount = 0 # positive word score negcount = 0 # negative word score for word in seg_sent: # print word if word in posdict: # print word poscount += 1 for w in seg_sent[s:i]: # print w poscount = match(w, poscount) # print poscount s = i + 1 elif word in negdict: negcount += 1 for w in seg_sent[s:i]: negcount = match(w, negcount) s = i + 1 # if ! !, which means coming to end of sentence elif word == "!".decode("utf-8") or word == "!".decode('utf-8'): for w2 in seg_sent[::-1]: if w2 in posdict: poscount += 2 break elif w2 in negdict: negcount += 2 break i += 1 final_score.append(transform_to_positive_num(poscount, negcount)) # final process pos_result, neg_result = 0, 0 for res1, res2 in final_score: # 每个分句循环累加 pos_result += res1 neg_result += res2 #print pos_result, neg_result result = pos_result - neg_result # final score return result
def single_review_sentiment_score(comment_sent): single_review_senti_score = [] cuted_review = tp.cut_sentence(comment_sent) # 句子切分,单独对每个句子进行分析 for sent in cuted_review: seg_sent = tp.segmentation(sent) # 分词 seg_sent = tp.del_stopwords(seg_sent)[:] #for w in seg_sent: # print w, i = 0 # 记录扫描到的词的位置 s = 0 # 记录情感词的位置 poscount = 0 # 记录该分句中的积极情感得分 negcount = 0 # 记录该分句中的消极情感得分 for word in seg_sent: # 逐词分析 #print word if word in posdict: # 如果是积极情感词 #print "posword:", word poscount += 1 # 积极得分+1 for w in seg_sent[s:i]: poscount = match(w, poscount) #print "poscount:", poscount s = i + 1 # 记录情感词的位置变化 elif word in negdict: # 如果是消极情感词 #print "negword:", word negcount += 1 for w in seg_sent[s:i]: negcount = match(w, negcount) #print "negcount:", negcount s = i + 1 # 如果是感叹号,表示已经到本句句尾 elif word == "!" or word == "!": for w2 in seg_sent[::-1]: # 倒序扫描感叹号前的情感词,发现后权值+2,然后退出循环 if w2 in posdict: poscount += 2 break elif w2 in negdict: negcount += 2 break i += 1 #print "poscount,negcount", poscount, negcount single_review_senti_score.append( transform_to_positive_num(poscount, negcount)) # 对得分做最后处理 pos_result, neg_result = 0, 0 # 分别记录积极情感总得分和消极情感总得分 for res1, res2 in single_review_senti_score: # 每个分句循环累加 pos_result += res1 neg_result += res2 #print pos_result, neg_result result = pos_result - neg_result # 该条评论情感的最终得分 result = round(result, 1) return result
def single_review_sentiment_score(weibo_sent): single_review_senti_score = [] cuted_review = tp.cut_sentence(weibo_sent) # 句子切分,单独对每个句子进行分析 for sent in cuted_review: seg_sent = tp.segmentation(sent) # 分词 seg_sent = tp.del_stopwords(seg_sent)[:] #for w in seg_sent: # print w, i = 0 # 记录扫描到的词的位置 s = 0 # 记录情感词的位置 poscount = 0 # 记录该分句中的积极情感得分 negcount = 0 # 记录该分句中的消极情感得分 for word in seg_sent: # 逐词分析 #print word if word in posdict: # 如果是积极情感词 #print "posword:", word poscount += 1 # 积极得分+1 for w in seg_sent[s:i]: poscount = match(w, poscount) #print "poscount:", poscount s = i + 1 # 记录情感词的位置变化 elif word in negdict: # 如果是消极情感词 #print "negword:", word negcount += 1 for w in seg_sent[s:i]: negcount = match(w, negcount) #print "negcount:", negcount s = i + 1 # 如果是感叹号,表示已经到本句句尾 elif word == "!".decode("utf-8") or word == "!".decode('utf-8'): for w2 in seg_sent[::-1]: # 倒序扫描感叹号前的情感词,发现后权值+2,然后退出循环 if w2 in posdict: poscount += 2 break elif w2 in negdict: negcount += 2 break i += 1 #print "poscount,negcount", poscount, negcount single_review_senti_score.append(transform_to_positive_num(poscount, negcount)) # 对得分做最后处理 pos_result, neg_result = 0, 0 # 分别记录积极情感总得分和消极情感总得分 for res1, res2 in single_review_senti_score: # 每个分句循环累加 pos_result += res1 neg_result += res2 #print pos_result, neg_result result = pos_result - neg_result # 该条微博情感的最终得分 result = round(result, 1) return result
def single_review_sentiment_score(pinglun_sent): total = jieba.analyse.extract_tags(pinglun_sent, topK=20, withWeight=True, allowPOS=()) #找关键词 ''' for each in total: try: tmp = Dict.objects.get(word=each[0]) if tmp.type == 'pos': print(each, tmp.type) elif tmp.type == 'neg': print(each, tmp.type) except: print(each, '没找到') ''' seg_sent = tp.segmentation(pinglun_sent) # 分词 i = 0 # 记录扫描到的词的位置 s = 0 # 记录情感词的位置 pos_count = 0 # 记录该分句中的积极情感得分 neg_count = 0 # 记录该分句中的消极情感得分 for each in seg_sent: # 逐词分析 try: tmp = Dict.objects.get(word=each[0]) if tmp.type == 'pos': print(each, tmp.type) for w in total: if each == w[0]: pos_count += w[1] pos_count += 1 for w in seg_sent[s:i]: pos_count = match(w, pos_count) s = i + 1 # 记录情感词的位置变化 elif tmp.type == 'neg': # 如果是消极情感词 print(each, tmp.type) for w in total: if each == w[0]: neg_count += w[1] neg_count += 1 for w in seg_sent[s:i]: neg_count = match(w, neg_count) s = i + 1 # 记录情感词的位置变化 except: print(each, '没找到') i += 1 total_count = len(total) print(pos_count, neg_count, total_count, float(pos_count - neg_count) / total_count) return float(pos_count - neg_count) / total_count
def calculate_score(self, content): total_score = 0.0 cut_contexts = tp.cut_sentence(content) isKeyWord = False # 对应每句话 # print 'content: ', content for cut_context in cut_contexts: words = tp.segmentation(cut_context) # 去停用词 words = self.del_stop_words(words) score = 0.0 prefix = 1.0 for word in words: if word in self.inverse_dict: prefix *= self.weight_inverse # print "inverse word:", word, self.weight_inverse elif word in self.ish_dict: prefix *= self.weight_ish # print "ish word", word, self.weight_ish elif word in self.more_dict: prefix *= self.weight_more # print "more word:", word, self.weight_more elif word in self.very_dict: prefix *= self.weight_very # print "very word:", word, self.weight_very elif word in self.most_dict: prefix *= self.weight_most # print "most word", word, self.weight_most # if len(word) >= 2: else: result, flag = self.word_search(word) if flag: score += result if word in self.key_words: isKeyWord = True # print 'key',word # print 'word:', word, score score_final = prefix * score # print 'sentence:', cut_context, score_final, prefix # if (score_final > -0.001) & (score_final < 0.001): # score_final = self.SnowNLP_analyze(cut_context) # print "final_socre",score_final total_score += score_final # print 'total score:', total_score if isKeyWord & (total_score < -0.01): total_score += self.weight_key_word return total_score
def single_review_sentiment_score(weibo_sent): single_review_senti_score = [] cuted_review = tp.cut_sentence(weibo_sent) for sent in cuted_review: seg_sent = tp.segmentation(sent) ''' if seg_sent[0]=='@': continue ''' i = 0 # 记录扫描到的词的位置 s = 0 # 记录情感词的位置 poscount = 0 # 记录该分句中的积极情感得分 negcount = 0 # 记录该分句中的消极情感得分 for word in seg_sent: if word in posdict: poscount += 1 for w in seg_sent[s:i]: poscount = match(w, poscount) s = i + 1 elif word in negdict: negcount += 1 for w in seg_sent[s:i]: negcount = match(w, negcount) s = i + 1 elif word == "吗".encode('utf-8').decode("utf-8"): for w2 in seg_sent[::-1]: if w2 in posdict: poscount = poscount * -1 break elif w2 in negdict: negcount = negcount * -1 break elif word == "?".encode('utf-8').decode( "utf-8") or word == "?".encode('utf-8').decode("utf-8"): if i + 1 < len(seg_sent): for w2 in seg_sent[i + 1:]: if w2 == '?'.encode('utf-8').decode( "utf-8") or w2 == '?'.encode('utf-8').decode( "utf-8"): if negcount > poscount: negcount = negcount + 1 else: poscount = poscount + 1 if negcount == 0 and poscount == 0: negcount = negcount + 1 elif word == "!".encode('utf-8').decode( "utf-8") or word == "!".encode('utf-8').decode("utf-8"): m = 0 for w2 in seg_sent[::-1]: if w2 == "!".encode('utf-8').decode( "utf-8") or word == "!".encode('utf-8').decode( "utf-8"): m = m + 1 if w2 in posdict: poscount = poscount + 1.5 * m break elif w2 in negdict: negcount = negcount + 1.5 * m break i += 1 single_review_senti_score.append( transform_to_positive_num(poscount, negcount)) ''' poscount, negcount=transform_to_positive_num(poscount, negcount) single_review_senti_score.append(poscount-negcount) single_review_senti_score=np.array(single_review_senti_score) if sum(single_review_senti_score)==0: result=0 else: result=np.mean(single_review_senti_score[single_review_senti_score!=0]) return result ''' pos_result, neg_result = 0, 0 for res1, res2 in single_review_senti_score: pos_result += res1 neg_result += res2 result = pos_result - neg_result result = round(result, 2) if result > 10: result = 10 elif result < -10: result = -10 return result
import text_process as tp import jieba jieba.load_userdict( 'E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\emotion_dict\pos_all_dict.txt' ) jieba.load_userdict( 'E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\emotion_dict\\neg_all_dict.txt' ) jieba.load_userdict( "E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\degree_dict\insufficiently_inverse.txt" ) news_sent = '我不是很看好这支股票会上涨。' # cuted_review = tp.cut_sentence(news_sent) # 句子切分,单独对每个句子进行分析 cuted_review = list(jieba.cut(news_sent)) print(cuted_review) for sent in cuted_review: seg_sent = tp.segmentation(sent) # 分词 seg_sent = tp.del_stopwords(seg_sent)[:] print(seg_sent)