예제 #1
0
def test_neutral(to_stdout=True):
    """
    neutral表示作者的话
    :param to_stdout:
    :return:
    """
    print 'test neutral'

    sentiment.load('../data/impurity_classifier')
    result_file = None
    if not to_stdout:
        result_file = codecs.open('../data/result.csv', 'w', encoding='gbk', errors='ignore')

    with codecs.open('../data/clean_neutral.txt', encoding='utf-8') as neutral_file:
        for line in neutral_file:
            line = line.strip()
            prob = sentiment.classify(line)
            if 0.8 > prob > 0.2:
                if to_stdout:
                    print (line + ',' + str(prob > 0.5 and 1 or 0) + ',' + str(prob) + cur_linesep).encode('gbk')
                    raw_input('press enter to continue')
                else:
                    result_file.write(line + ',' + str(prob > 0.5 and 1 or 0) + ',' + str(prob) + cur_linesep)

    if not to_stdout:
        result_file.close()
예제 #2
0
    def get_type_good_courses(self):

        #print("in")
        course_type = self.get_name("sst")
        sql = f"match(c:Course) return c.title"
        ans = self.graph.run(sql)
        
        results = []
        for course in ans :
            sql = f"match(c:Course)-[] ->(ce:Course_Type) where c.title = '{course}' return ce.name"
            type_ = self.graph.run(sql)[0]

            if type_ == course_type :
                sql = f"match(c:Course)-[]->() where c.title = '{course}' return c.rating"
                rating = self.graph.run(sql)[0]
                score = sentiment.classify(rating)
                if score >= 0.4:
                    print(course,score)
                    results.append(course)

        if len(results)>0:
            res_ = "、".join(results)
        else :
            return "该类型下没有对应的课程"


        res = course_type + "评价不错的课程有: " + res_ + "等~"
        return res 
예제 #3
0
    def get_school_good_courses(self):
        school = self.get_name("ut")

        course_type = self.get_name("sst")

        sql = f"match(s:School)-[] -> (c:Course) where s.name = '{school}' return c.title"
        courses = self.graph.run(sql)
        
        results = []

        for course in courses:
            sql = f"match(c:Course)-[] ->(ce:Course_Type) where c.title = '{course}' return ce.name"
            type_ = self.graph.run(sql)[0]
            if type_ == course_type:
                sql = f"match (c:Course)-[]->() where c.title='{course}' return c.rating"
                rating = self.graph.run(sql)[0]
                score = sentiment.classify(rating)
                if score >= 0.4:
                    print(course,score)
                    results.append(course)
        
        if len(results) == 0:
            return "该院系没有对应的课程"
        else :
            res = "、".join(results)
        
        ret = school + "开设的" + "评价不错的" + course_type + "有: " + res + "等~"
        return ret
예제 #4
0
def classify(cases):
    """
    测试列表中每个句子的
    :param cases:
    :return:
    """
    for case in cases:
        case = clean_impurity(case)
        sentiment.load('../data/impurity_classifier')
        prob = sentiment.classify(case)
        print (case + ',' + str(prob > 0.5 and 1 or 0) + ',' + str(prob) + cur_linesep).encode('gbk')
예제 #5
0
 def emotionCalculate(self, name, sent, comment_score):
     # 情感分析,(已经确定name在句子str里面了)累加该评论中含有该名字的短句感情值
     emotion = 0.0
     num = 0
     s_em = 0
     for s in re.split(',|\.|;|,|。|;|!', sent):
         if s.find(name) != -1:
             em = sentiment.classify(sent.decode('utf8')) - 0.5
             s_em += em
             num += 1
             print s
             print "em: " + str(em)
     if num >= 1:
         emotion += s_em * 1.0 / num
     result = 0.5 * int(comment_score) + 5 * emotion
     print "0.5 * " + comment_score + ", 5 *" + str(emotion) + " => " + str(result)
     return result
예제 #6
0
def test_sentiment():
    print 'test model'

    sentiment.load('../data/train_impurity_classifier')

    print 'test_negative'
    # with codecs.open('../data/test_negative.txt', encoding='utf-8') as negative_file:
    #     for line in negative_file:
    #         if sentiment.classify(line) > 0.1:
    #             print line,

    raw_input('press enter to continue')
    print 'test_positive'
    with codecs.open('../data/test_positive.txt', encoding='utf-8') as positive_file:
        for line in positive_file:
            if sentiment.classify(line) < 0.5:
                print line,
예제 #7
0
    def testEmotionModel(self, path):
	# 测试训练出来的情感模型,如果模型打分0.6一下,并且该评论分低于3分,认为正确
        num = 0.0
        with open(path) as file:
            lines = file.readlines()
            for line in lines:
                arr = line.split("\001")
                com = arr[5].replace("\"", "").replace("\n", "")
                score = int(arr[4]) / 10
                em = sentiment.classify(com)
                print com
                print "score is " + str(score) + " emotion is " + str(em)
                if em < 0.6 and score < 3:
                    num += 1.0
                else:
                    if em >= 0.6 and score >= 3:
                        num += 1.0

        return num
예제 #8
0
from snownlp import SnowNLP
from snownlp import sentiment
import jieba

jieba.load_userdict('words.txt')
# string='原标题:市场信心不足,大盘反弹受阻'
string = '信心'

sent = sentiment.Sentiment()
words_list = sentiment.Sentiment.handle(sent, string)
# score=SnowNLP('原标题:市场信心不足,大盘反弹受阻')
score = SnowNLP('信心')

print(words_list)
s = score.sentiments

# s=sentiment.Sentiment.classify(sent,'原标题:市场信心不足,大盘反弹受阻')
print(s)
text = '原标题:市场信心不足,大盘反弹受阻'
print(sentiment.classify(text))
# #设置jieba自定义词库
# import jieba
# jieba.load_userdict('words.txt') #自己准备的常用词词典
# #优化后的lcut
# a=jieba.lcut('需要分词的文本,市场信心不足')
# a=SnowNLP(a)
# print(a.sentiments)
# # print(list(a))
目前健保已通過治療藥物,可減緩肺功能下降近五成,急性惡化機率減少六成八,如持續治療,能降低死亡風險四成三;已有患者用藥後惡化情形趨緩,從無法活動到每日步行一點五公里,提高生活品質,引起全台胸腔內科醫師大規模搜查疑似病例,欲揪出潛在患者。

張時杰表示,初步篩檢可透過公司企業或自費進行胸部X光合併吹氣肺功能檢查,後續再以臨床診斷、高解析度電腦斷層確診。

菜瓜布肺的危險因子,包含逾五十歲、直系家人或近親曾有病史、吸菸、暴露於化學工廠環境作業員等風險較高。
'''

from snownlp import normal
from snownlp import seg
from snownlp import sentiment
from snownlp.summary import textrank

if __name__ == '__main__':

    sents = normal.get_sentences(text)
    doc = []

    #summary
    for sent in sents:
        words = seg.seg(sent)
        words = normal.filter_stop(words)
        doc.append(words)
    rank = textrank.TextRank(doc)
    rank.solve()
    for index in rank.top_index(5):
        print(sents[index])

    #probability of the sentiment
    pro = sentiment.classify(text)
    print(pro)