Python segmentation Exemples, text_process.segmentation Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : dict_main.py Projet : TimothySY/Sentimental-Classification-of-Comments

def sentence_score(sentence):
    final_score = []
    cuted_review = tp.cut_sentence(sentence)  #cut sentence into subsentences
    # for w in cuted_review:
        # print w
    for sent in cuted_review:
        seg_sent = tp.segmentation(sent)   # segment words
        seg_sent = tp.del_stopwords(seg_sent)[:]
        # for w in seg_sent:
        #     print w
        i = 0    # current location
        s = 0    # emotion word location
        poscount = 0    # positive word score
        negcount = 0    # negative word score

        for word in seg_sent:
            # print word
            if word in posdict:
                # print word
                poscount += 1
                for w in seg_sent[s:i]:
                    # print w
                    poscount = match(w, poscount)
                    # print poscount

                s = i + 1

            elif word in negdict:

                negcount += 1
                for w in seg_sent[s:i]:
                    negcount = match(w, negcount)

                s = i + 1

            # if ! ！, which means coming to end of sentence
            elif word == "！".decode("utf-8") or word == "!".decode('utf-8'):
                for w2 in seg_sent[::-1]:
                    if w2 in posdict:
                        poscount += 2
                        break
                    elif w2 in negdict:
                        negcount += 2
                        break
            i += 1

        final_score.append(transform_to_positive_num(poscount, negcount))   # final process
    pos_result, neg_result = 0, 0
    for res1, res2 in final_score:  # 每个分句循环累加
        pos_result += res1
        neg_result += res2
    #print pos_result, neg_result
    result = pos_result - neg_result   # final score
    return result

Exemple #2

0

Afficher le fichier

Fichier : sentiment_analysis.py Projet : ZipengFeng/News_Search_engine

def single_review_sentiment_score(comment_sent):
    single_review_senti_score = []
    cuted_review = tp.cut_sentence(comment_sent)  # 句子切分，单独对每个句子进行分析

    for sent in cuted_review:
        seg_sent = tp.segmentation(sent)  # 分词
        seg_sent = tp.del_stopwords(seg_sent)[:]
        #for w in seg_sent:
        #	print w,
        i = 0  # 记录扫描到的词的位置
        s = 0  # 记录情感词的位置
        poscount = 0  # 记录该分句中的积极情感得分
        negcount = 0  # 记录该分句中的消极情感得分

        for word in seg_sent:  # 逐词分析
            #print word
            if word in posdict:  # 如果是积极情感词
                #print "posword:", word
                poscount += 1  # 积极得分+1
                for w in seg_sent[s:i]:
                    poscount = match(w, poscount)
                #print "poscount:", poscount
                s = i + 1  # 记录情感词的位置变化

            elif word in negdict:  # 如果是消极情感词
                #print "negword:", word
                negcount += 1
                for w in seg_sent[s:i]:
                    negcount = match(w, negcount)
                #print "negcount:", negcount
                s = i + 1

            # 如果是感叹号，表示已经到本句句尾
            elif word == "！" or word == "!":
                for w2 in seg_sent[::-1]:  # 倒序扫描感叹号前的情感词，发现后权值+2，然后退出循环
                    if w2 in posdict:
                        poscount += 2
                        break
                    elif w2 in negdict:
                        negcount += 2
                        break
            i += 1
        #print "poscount,negcount", poscount, negcount
        single_review_senti_score.append(
            transform_to_positive_num(poscount, negcount))  # 对得分做最后处理
    pos_result, neg_result = 0, 0  # 分别记录积极情感总得分和消极情感总得分
    for res1, res2 in single_review_senti_score:  # 每个分句循环累加
        pos_result += res1
        neg_result += res2
    #print pos_result, neg_result
    result = pos_result - neg_result  # 该条评论情感的最终得分
    result = round(result, 1)
    return result

Exemple #3

0

Afficher le fichier

Fichier : dict_main.py Projet : Baichenjia/Graduation-design

def single_review_sentiment_score(weibo_sent):
	single_review_senti_score = []
	cuted_review = tp.cut_sentence(weibo_sent)  # 句子切分，单独对每个句子进行分析

	for sent in cuted_review:
		seg_sent = tp.segmentation(sent)   # 分词
		seg_sent = tp.del_stopwords(seg_sent)[:]
		#for w in seg_sent:
		#	print w,
		i = 0    # 记录扫描到的词的位置
		s = 0    # 记录情感词的位置
		poscount = 0    # 记录该分句中的积极情感得分
		negcount = 0    # 记录该分句中的消极情感得分

		for word in seg_sent:   # 逐词分析
			#print word
			if word in posdict:  # 如果是积极情感词
				#print "posword:", word
				poscount += 1   # 积极得分+1
				for w in seg_sent[s:i]:
					poscount = match(w, poscount)
				#print "poscount:", poscount
				s = i + 1  # 记录情感词的位置变化

			elif word in negdict:  # 如果是消极情感词
				#print "negword:", word
				negcount += 1
				for w in seg_sent[s:i]:
					negcount = match(w, negcount)
				#print "negcount:", negcount
				s = i + 1

			# 如果是感叹号，表示已经到本句句尾
			elif word == "！".decode("utf-8") or word == "!".decode('utf-8'):
				for w2 in seg_sent[::-1]:  # 倒序扫描感叹号前的情感词，发现后权值+2，然后退出循环
					if w2 in posdict:
						poscount += 2
						break
					elif w2 in negdict:
						negcount += 2
						break
			i += 1
		#print "poscount,negcount", poscount, negcount
		single_review_senti_score.append(transform_to_positive_num(poscount, negcount))   # 对得分做最后处理
	pos_result, neg_result = 0, 0   # 分别记录积极情感总得分和消极情感总得分
	for res1, res2 in single_review_senti_score:  # 每个分句循环累加
		pos_result += res1
		neg_result += res2
	#print pos_result, neg_result
	result = pos_result - neg_result   # 该条微博情感的最终得分
	result = round(result, 1)
	return result

Exemple #4

0

Afficher le fichier

Fichier : TF-IDF.py Projet : hxxyy/buptQuestionnaire-python

def single_review_sentiment_score(pinglun_sent):
    total = jieba.analyse.extract_tags(pinglun_sent,
                                       topK=20,
                                       withWeight=True,
                                       allowPOS=())  #找关键词
    '''
        for each in total:
            try:
                tmp = Dict.objects.get(word=each[0])
                if tmp.type == 'pos':
                    print(each, tmp.type)
                elif tmp.type == 'neg':
                    print(each, tmp.type)
            except:
                print(each, '没找到')
        '''
    seg_sent = tp.segmentation(pinglun_sent)  # 分词
    i = 0  # 记录扫描到的词的位置
    s = 0  # 记录情感词的位置
    pos_count = 0  # 记录该分句中的积极情感得分
    neg_count = 0  # 记录该分句中的消极情感得分

    for each in seg_sent:  # 逐词分析
        try:
            tmp = Dict.objects.get(word=each[0])
            if tmp.type == 'pos':
                print(each, tmp.type)
                for w in total:
                    if each == w[0]:
                        pos_count += w[1]
                pos_count += 1
                for w in seg_sent[s:i]:
                    pos_count = match(w, pos_count)
                s = i + 1  # 记录情感词的位置变化

            elif tmp.type == 'neg':  # 如果是消极情感词
                print(each, tmp.type)
                for w in total:
                    if each == w[0]:
                        neg_count += w[1]
                neg_count += 1
                for w in seg_sent[s:i]:
                    neg_count = match(w, neg_count)
                s = i + 1  # 记录情感词的位置变化
        except:
            print(each, '没找到')
        i += 1
    total_count = len(total)
    print(pos_count, neg_count, total_count,
          float(pos_count - neg_count) / total_count)
    return float(pos_count - neg_count) / total_count

Exemple #5

0

Afficher le fichier

Fichier : dictionary.py Projet : ZhangJun93/SentimentAnalyze

 def calculate_score(self, content):
     total_score = 0.0
     cut_contexts = tp.cut_sentence(content)
     isKeyWord = False
     # 对应每句话
     # print 'content: ', content
     for cut_context in cut_contexts:
         words = tp.segmentation(cut_context)
         # 去停用词
         words = self.del_stop_words(words)
         score = 0.0
         prefix = 1.0
         for word in words:
             if word in self.inverse_dict:
                 prefix *= self.weight_inverse
                 # print "inverse word:", word, self.weight_inverse
             elif word in self.ish_dict:
                 prefix *= self.weight_ish
                 # print "ish word", word, self.weight_ish
             elif word in self.more_dict:
                 prefix *= self.weight_more
                 # print "more word:", word, self.weight_more
             elif word in self.very_dict:
                 prefix *= self.weight_very
                 # print "very word:", word, self.weight_very
             elif word in self.most_dict:
                 prefix *= self.weight_most
                 # print "most word", word, self.weight_most
             # if len(word) >= 2:
             else:
                 result, flag = self.word_search(word)
                 if flag:
                     score += result
                 if word in self.key_words:
                     isKeyWord = True
                     # print 'key',word
             # print 'word:', word, score
         score_final = prefix * score
         # print 'sentence:', cut_context, score_final, prefix
         # if (score_final > -0.001) & (score_final < 0.001):
         #     score_final = self.SnowNLP_analyze(cut_context)
         # print "final_socre",score_final
         total_score += score_final
         # print 'total score:', total_score
     if isKeyWord & (total_score < -0.01):
         total_score += self.weight_key_word
     return total_score

Exemple #6

0

Afficher le fichier

Fichier : emotion_score.py Projet : blue9735/PanicBuyingComments

def single_review_sentiment_score(weibo_sent):
    single_review_senti_score = []
    cuted_review = tp.cut_sentence(weibo_sent)
    for sent in cuted_review:
        seg_sent = tp.segmentation(sent)
        '''
        if seg_sent[0]=='@':
            continue
        '''
        i = 0  # 记录扫描到的词的位置
        s = 0  # 记录情感词的位置
        poscount = 0  # 记录该分句中的积极情感得分
        negcount = 0  # 记录该分句中的消极情感得分

        for word in seg_sent:
            if word in posdict:
                poscount += 1
                for w in seg_sent[s:i]:
                    poscount = match(w, poscount)
                s = i + 1
            elif word in negdict:
                negcount += 1
                for w in seg_sent[s:i]:
                    negcount = match(w, negcount)
                s = i + 1
            elif word == "吗".encode('utf-8').decode("utf-8"):
                for w2 in seg_sent[::-1]:
                    if w2 in posdict:
                        poscount = poscount * -1
                        break
                    elif w2 in negdict:
                        negcount = negcount * -1
                        break

            elif word == "？".encode('utf-8').decode(
                    "utf-8") or word == "?".encode('utf-8').decode("utf-8"):

                if i + 1 < len(seg_sent):
                    for w2 in seg_sent[i + 1:]:
                        if w2 == '？'.encode('utf-8').decode(
                                "utf-8") or w2 == '?'.encode('utf-8').decode(
                                    "utf-8"):
                            if negcount > poscount:
                                negcount = negcount + 1
                            else:
                                poscount = poscount + 1
                            if negcount == 0 and poscount == 0:
                                negcount = negcount + 1
            elif word == "！".encode('utf-8').decode(
                    "utf-8") or word == "!".encode('utf-8').decode("utf-8"):
                m = 0
                for w2 in seg_sent[::-1]:
                    if w2 == "！".encode('utf-8').decode(
                            "utf-8") or word == "!".encode('utf-8').decode(
                                "utf-8"):
                        m = m + 1
                    if w2 in posdict:
                        poscount = poscount + 1.5 * m
                        break
                    elif w2 in negdict:
                        negcount = negcount + 1.5 * m
                        break

            i += 1
        single_review_senti_score.append(
            transform_to_positive_num(poscount, negcount))
        '''
        poscount, negcount=transform_to_positive_num(poscount, negcount)
        single_review_senti_score.append(poscount-negcount)
    single_review_senti_score=np.array(single_review_senti_score)
    if sum(single_review_senti_score)==0:
        result=0
    else:
        result=np.mean(single_review_senti_score[single_review_senti_score!=0]) 
    return result
    '''

    pos_result, neg_result = 0, 0
    for res1, res2 in single_review_senti_score:
        pos_result += res1
        neg_result += res2
    result = pos_result - neg_result
    result = round(result, 2)
    if result > 10:
        result = 10
    elif result < -10:
        result = -10
    return result

Exemple #7

0

Afficher le fichier

Fichier : 草稿本.py Projet : ron-tsai/-

import text_process as tp
import jieba
jieba.load_userdict(
    'E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\emotion_dict\pos_all_dict.txt'
)
jieba.load_userdict(
    'E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\emotion_dict\\neg_all_dict.txt'
)
jieba.load_userdict(
    "E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\degree_dict\insufficiently_inverse.txt"
)

news_sent = '我不是很看好这支股票会上涨。'
# cuted_review = tp.cut_sentence(news_sent)  # 句子切分，单独对每个句子进行分析
cuted_review = list(jieba.cut(news_sent))
print(cuted_review)
for sent in cuted_review:
    seg_sent = tp.segmentation(sent)  # 分词
    seg_sent = tp.del_stopwords(seg_sent)[:]
    print(seg_sent)