def test_neutral(to_stdout=True): """ neutral表示作者的话 :param to_stdout: :return: """ print 'test neutral' sentiment.load('../data/impurity_classifier') result_file = None if not to_stdout: result_file = codecs.open('../data/result.csv', 'w', encoding='gbk', errors='ignore') with codecs.open('../data/clean_neutral.txt', encoding='utf-8') as neutral_file: for line in neutral_file: line = line.strip() prob = sentiment.classify(line) if 0.8 > prob > 0.2: if to_stdout: print (line + ',' + str(prob > 0.5 and 1 or 0) + ',' + str(prob) + cur_linesep).encode('gbk') raw_input('press enter to continue') else: result_file.write(line + ',' + str(prob > 0.5 and 1 or 0) + ',' + str(prob) + cur_linesep) if not to_stdout: result_file.close()
def get_type_good_courses(self): #print("in") course_type = self.get_name("sst") sql = f"match(c:Course) return c.title" ans = self.graph.run(sql) results = [] for course in ans : sql = f"match(c:Course)-[] ->(ce:Course_Type) where c.title = '{course}' return ce.name" type_ = self.graph.run(sql)[0] if type_ == course_type : sql = f"match(c:Course)-[]->() where c.title = '{course}' return c.rating" rating = self.graph.run(sql)[0] score = sentiment.classify(rating) if score >= 0.4: print(course,score) results.append(course) if len(results)>0: res_ = "、".join(results) else : return "该类型下没有对应的课程" res = course_type + "评价不错的课程有: " + res_ + "等~" return res
def get_school_good_courses(self): school = self.get_name("ut") course_type = self.get_name("sst") sql = f"match(s:School)-[] -> (c:Course) where s.name = '{school}' return c.title" courses = self.graph.run(sql) results = [] for course in courses: sql = f"match(c:Course)-[] ->(ce:Course_Type) where c.title = '{course}' return ce.name" type_ = self.graph.run(sql)[0] if type_ == course_type: sql = f"match (c:Course)-[]->() where c.title='{course}' return c.rating" rating = self.graph.run(sql)[0] score = sentiment.classify(rating) if score >= 0.4: print(course,score) results.append(course) if len(results) == 0: return "该院系没有对应的课程" else : res = "、".join(results) ret = school + "开设的" + "评价不错的" + course_type + "有: " + res + "等~" return ret
def classify(cases): """ 测试列表中每个句子的 :param cases: :return: """ for case in cases: case = clean_impurity(case) sentiment.load('../data/impurity_classifier') prob = sentiment.classify(case) print (case + ',' + str(prob > 0.5 and 1 or 0) + ',' + str(prob) + cur_linesep).encode('gbk')
def emotionCalculate(self, name, sent, comment_score): # 情感分析,(已经确定name在句子str里面了)累加该评论中含有该名字的短句感情值 emotion = 0.0 num = 0 s_em = 0 for s in re.split(',|\.|;|,|。|;|!', sent): if s.find(name) != -1: em = sentiment.classify(sent.decode('utf8')) - 0.5 s_em += em num += 1 print s print "em: " + str(em) if num >= 1: emotion += s_em * 1.0 / num result = 0.5 * int(comment_score) + 5 * emotion print "0.5 * " + comment_score + ", 5 *" + str(emotion) + " => " + str(result) return result
def test_sentiment(): print 'test model' sentiment.load('../data/train_impurity_classifier') print 'test_negative' # with codecs.open('../data/test_negative.txt', encoding='utf-8') as negative_file: # for line in negative_file: # if sentiment.classify(line) > 0.1: # print line, raw_input('press enter to continue') print 'test_positive' with codecs.open('../data/test_positive.txt', encoding='utf-8') as positive_file: for line in positive_file: if sentiment.classify(line) < 0.5: print line,
def testEmotionModel(self, path): # 测试训练出来的情感模型,如果模型打分0.6一下,并且该评论分低于3分,认为正确 num = 0.0 with open(path) as file: lines = file.readlines() for line in lines: arr = line.split("\001") com = arr[5].replace("\"", "").replace("\n", "") score = int(arr[4]) / 10 em = sentiment.classify(com) print com print "score is " + str(score) + " emotion is " + str(em) if em < 0.6 and score < 3: num += 1.0 else: if em >= 0.6 and score >= 3: num += 1.0 return num
from snownlp import SnowNLP from snownlp import sentiment import jieba jieba.load_userdict('words.txt') # string='原标题:市场信心不足,大盘反弹受阻' string = '信心' sent = sentiment.Sentiment() words_list = sentiment.Sentiment.handle(sent, string) # score=SnowNLP('原标题:市场信心不足,大盘反弹受阻') score = SnowNLP('信心') print(words_list) s = score.sentiments # s=sentiment.Sentiment.classify(sent,'原标题:市场信心不足,大盘反弹受阻') print(s) text = '原标题:市场信心不足,大盘反弹受阻' print(sentiment.classify(text)) # #设置jieba自定义词库 # import jieba # jieba.load_userdict('words.txt') #自己准备的常用词词典 # #优化后的lcut # a=jieba.lcut('需要分词的文本,市场信心不足') # a=SnowNLP(a) # print(a.sentiments) # # print(list(a))
目前健保已通過治療藥物,可減緩肺功能下降近五成,急性惡化機率減少六成八,如持續治療,能降低死亡風險四成三;已有患者用藥後惡化情形趨緩,從無法活動到每日步行一點五公里,提高生活品質,引起全台胸腔內科醫師大規模搜查疑似病例,欲揪出潛在患者。 張時杰表示,初步篩檢可透過公司企業或自費進行胸部X光合併吹氣肺功能檢查,後續再以臨床診斷、高解析度電腦斷層確診。 菜瓜布肺的危險因子,包含逾五十歲、直系家人或近親曾有病史、吸菸、暴露於化學工廠環境作業員等風險較高。 ''' from snownlp import normal from snownlp import seg from snownlp import sentiment from snownlp.summary import textrank if __name__ == '__main__': sents = normal.get_sentences(text) doc = [] #summary for sent in sents: words = seg.seg(sent) words = normal.filter_stop(words) doc.append(words) rank = textrank.TextRank(doc) rank.solve() for index in rank.top_index(5): print(sents[index]) #probability of the sentiment pro = sentiment.classify(text) print(pro)