コード例 #1
0
def fenci1(comments, storePath, filter=False):
    stopwords = tp.loadTextWords(os.getcwd() +
                                 "\Dictionary\stopword\stopwords.txt")
    for comment in comments:
        seg_result = tp.segmentation(comment, 'str')
        seg_list = tp.segmentation(comment, 'list')
        if (filter == True):
            seg_filter = [
                word for word in seg_list
                if word not in stopwords and word != ' '
            ]
            seg_result = " ".join(seg_filter)
        print(seg_result)
        with codecs.open(storePath, 'a+', 'utf-8') as f:
            f.writelines(seg_result + "\n")
コード例 #2
0
 def cut_sentences_words(self, review):
     sent_words = []
     cuted_review = tp.cut_sentence_2(review)
     for sent in cuted_review:
         seg_sent = tp.segmentation(sent, 'list')
         #seg_sent = self.stop_word_filter(seg_sent)
         sent_words.append(seg_sent)
     return sent_words
コード例 #3
0
def get_single_sent_count(cuted_sents):
    single_review_senti_score = []
    for sent in cuted_sents:
        seg_sent = tp.segmentation(sent, 'list')
        i = 0  # word position counter
        a = 0  # sentiment word position
        poscount = 0  # count a positive word
        negcount = 0  # count a negative word

        #match 用于表示程度
        for word in seg_sent:
            if word in posdict:
                poscount += 1
                for w in seg_sent[a:i]:
                    poscount = match(w, poscount)
                a = i + 1

            elif word in negdict:
                negcount += 1
                for w in seg_sent[a:i]:
                    negcount = match(w, negcount)
                a = i + 1

            # Match "!" in the review, every "!" has a weight of +2
            elif word == "!".decode('utf8') or word == "!".decode('utf8'):
                for w2 in seg_sent[::-1]:
                    if w2 in posdict:
                        poscount += 2
                        break
                    elif w2 in negdict:
                        negcount += 2
                        break
            i += 1

        single_review_senti_score.append(
            transform_to_positive_num(poscount, negcount))
    return single_review_senti_score
コード例 #4
0
def single_review_sentiment_score(weibo_sent):
    single_review_senti_score = []
    all_word = []
    cuted_review = tp.cut_sentence(weibo_sent)  # 句子切分,单独对每个句子进行分析
    for sent in cuted_review:
        seg_sent = tp.segmentation(sent)  # 分词

        seg_sent = tp.del_stopwords(seg_sent)[:]
        #print(seg_sent)
        i = 0  # 记录扫描到的词的位置
        s = 0  # 记录情感词的位置
        poscount = 0  # 记录该分句中的积极情感得分
        negcount = 0  # 记录该分句中的消极情感得分
        mark1_count = 0
        mark2_count = 0
        for word in seg_sent:  # 逐词分析
            all_word.append(word)
            if word in posdict:  # 如果是积极情感词
                # print "posword:", word
                poscount += 1  # 积极得分+1
                for w in seg_sent[s:i]:
                    poscount = match(w, poscount)
                # print "poscount:", poscount
                s = i + 1  # 记录情感词的位置变化

            elif word in negdict:  # 如果是消极情感词
                # print "negword:", word
                negcount += 1
                for w in seg_sent[s:i]:
                    negcount = match(w, negcount)
                # print "negcount:", negcount
                s = i + 1
            # 如果是感叹号,表示已经到本句句尾
            # elif word == "!" :
            elif word.encode('UTF-8') == "? " or word.encode('UTF-8') == " ?":
                mark1_count += 1

            elif word.encode('UTF-8') == "!" or word.encode('UTF-8') == "!":
                mark2_count += 1
                for w2 in seg_sent[::-1]:  # 倒序扫描感叹号前的情感词,发现后权值+2,然后退出循环
                    if w2 in posdict:
                        poscount += 1
                        break
                    elif w2 in negdict:
                        negcount += 1
                        break
            i += 1

        #print (negcount)
        single_review_senti_score.append(
            transform_to_positive_num(poscount, negcount))  # 对得分做最后处理
        #print("poscount,negcount,?, !", poscount, negcount, mark1_count, mark2_count)
    #return  single_review_senti_score
    #print ("lenth:%d"%(len(all_word)))
    su = len(all_word)
    pos_result, neg_result = 0, 0  # 分别记录积极情感总得分和消极情感总得分
    sentlength = len(single_review_senti_score)
    #wordlength = len(all_word)
    #print ("该回答共有%d 分句,共有%d 个分词" %(sentlength,wordlength))
    #print ("该回答共有%d个词" %wordlength)
    pos_score = []
    neg_score = []
    for res1, res2 in single_review_senti_score:  # 每个分句循环累加
        pos_result += res1
        neg_result += res2

    pos_score.append(pos_result)
    neg_score.append(neg_result)

    # print pos_result, neg_result
    result1 = (pos_result - neg_result)  # 简单计算该语句的得分
    result2 = (pos_result + neg_result)
    try:
        result = result1 / result2  #利用林乐模型计算语调
        tone = round(result, 3)
        #print (tone)
        #return result
    except Exception as e:
        tone = 0
        #return result
    res = 0
    if tone > 0.0:
        res = 1
    elif tone < 0.0:
        res = 2
    #print ("susu:%d xue:%d" %(pos_result,neg_result))
    return pos_result, neg_result, tone, su