Python del_stopwords 예제들, text_process.del_stopwords Python 예제들

예제 #1

0

파일 보기

파일: 产生情感列.py 프로젝트: ron-tsai/-

def calculate(news_sent):


    cuted_review = list(jieba.cut(news_sent))  # 句子切分，单独对每个句子进行分析
    cuted_review = tp.del_stopwords(cuted_review)
    print(cuted_review)
    count_list = []
    sent_list = []
    for sent in cuted_review:
        count_list = match(sent, count_list)
        sent_list = sent_match(sent, sent_list)

    score = mutiplication_list(count_list, sent_list)
    if score != 0:
        if ('只要' in cuted_review)|('虽然' in cuted_review)|('虽说' in cuted_review) | ('如果' in cuted_review) | ('即使' in cuted_review) | ('即便' in cuted_review) | (
            '不要说只有' in cuted_review) | ('即便真的有' in cuted_review) | ('除非' in cuted_review) | ('否则' in cuted_review) | (
            '若' in cuted_review):

            score = 0

        else:
            if (('下降' in cuted_review)|('降低' in cuted_review)|('增加' in cuted_review)) & (('费用' in cuted_review)|('成本' in cuted_review)|('财务费用' in cuted_review)|('财务成本' in cuted_review)|('风险' in cuted_review)|('成本' in cuted_review)|('进口成本' in cuted_review)):
                score=score*(-1)
            elif ('房价' in cuted_review) & (('下降' in cuted_review)|('下跌' in cuted_review)):
                score = score * (-1)
            elif (('现货价格' in cuted_review) | ('油价' in cuted_review)) & (
                ('上涨' in cuted_review) | ('大涨' in cuted_review)):
                score = score * (-1)
            elif (('下降' in cuted_review) | ('回落' in cuted_review) | ('上行' in cuted_review) | (
                '下行' in cuted_review)) & (('CPI' in cuted_review) |('通胀' in cuted_review) | ('商品价格' in cuted_review) | ('物价' in cuted_review) | (
                '原油' in cuted_review) | ('利率' in cuted_review)|('成本' in cuted_review)):
                score = score * (-1)

            elif ('抵御住了' in cuted_review) & ('冲击' in cuted_review):
                score = score * (-1)
            elif ('下跌' in cuted_review) & ('反弹' in cuted_review):
                score = score * (-1)
            elif ('损失' in cuted_review) & (('减少' in cuted_review) | ('最小化' in cuted_review)):
                score = score * (-1)
            elif (('下跌风险' in cuted_review) | ('风险' in cuted_review)) & ('控制' in cuted_review):
                score = score * (-1)
            elif ('下降' in cuted_review) & ('比例' in cuted_review):
                score = 0
            elif ('压力' in cuted_review) & ('缓解' in cuted_review):
                score = score * (-1)
            elif ('至少要到' in cuted_review) & ('回升' in cuted_review):
                score = score * (-1)
            elif ('增强' in cuted_review) & ('忧虑' in cuted_review):
                score = score * (-1)
            elif (('下调' in cuted_review) | ('下降' in cuted_review)) & (('存准率' in cuted_review) | (
                '存款准备金率' in cuted_review)|('基准利率' in cuted_review)|('CPI' in cuted_review)):
                score = score * (-1)
            elif ('扑灭' in cuted_review) & ('希望' in cuted_review):
                score = score * (-1)

    return score

예제 #2

0

파일 보기

def calculate(news_sent):
    single_review_senti_score = []

    cuted_review = list(jieba.cut(news_sent))  # 句子切分，单独对每个句子进行分析
    cuted_review = tp.del_stopwords(cuted_review)
    count_list = []
    for sent in cuted_review:
        count_list = match(sent, count_list)
    score = mutiplication_list(count_list)

    return score

예제 #3

0

파일 보기

파일: dict_main.py 프로젝트: TimothySY/Sentimental-Classification-of-Comments

def sentence_score(sentence):
    final_score = []
    cuted_review = tp.cut_sentence(sentence)  #cut sentence into subsentences
    # for w in cuted_review:
        # print w
    for sent in cuted_review:
        seg_sent = tp.segmentation(sent)   # segment words
        seg_sent = tp.del_stopwords(seg_sent)[:]
        # for w in seg_sent:
        #     print w
        i = 0    # current location
        s = 0    # emotion word location
        poscount = 0    # positive word score
        negcount = 0    # negative word score

        for word in seg_sent:
            # print word
            if word in posdict:
                # print word
                poscount += 1
                for w in seg_sent[s:i]:
                    # print w
                    poscount = match(w, poscount)
                    # print poscount

                s = i + 1

            elif word in negdict:

                negcount += 1
                for w in seg_sent[s:i]:
                    negcount = match(w, negcount)

                s = i + 1

            # if ! ！, which means coming to end of sentence
            elif word == "！".decode("utf-8") or word == "!".decode('utf-8'):
                for w2 in seg_sent[::-1]:
                    if w2 in posdict:
                        poscount += 2
                        break
                    elif w2 in negdict:
                        negcount += 2
                        break
            i += 1

        final_score.append(transform_to_positive_num(poscount, negcount))   # final process
    pos_result, neg_result = 0, 0
    for res1, res2 in final_score:  # 每个分句循环累加
        pos_result += res1
        neg_result += res2
    #print pos_result, neg_result
    result = pos_result - neg_result   # final score
    return result

예제 #4

0

파일 보기

파일: sentiment_analysis.py 프로젝트: ZipengFeng/News_Search_engine

def single_review_sentiment_score(comment_sent):
    single_review_senti_score = []
    cuted_review = tp.cut_sentence(comment_sent)  # 句子切分，单独对每个句子进行分析

    for sent in cuted_review:
        seg_sent = tp.segmentation(sent)  # 分词
        seg_sent = tp.del_stopwords(seg_sent)[:]
        #for w in seg_sent:
        #	print w,
        i = 0  # 记录扫描到的词的位置
        s = 0  # 记录情感词的位置
        poscount = 0  # 记录该分句中的积极情感得分
        negcount = 0  # 记录该分句中的消极情感得分

        for word in seg_sent:  # 逐词分析
            #print word
            if word in posdict:  # 如果是积极情感词
                #print "posword:", word
                poscount += 1  # 积极得分+1
                for w in seg_sent[s:i]:
                    poscount = match(w, poscount)
                #print "poscount:", poscount
                s = i + 1  # 记录情感词的位置变化

            elif word in negdict:  # 如果是消极情感词
                #print "negword:", word
                negcount += 1
                for w in seg_sent[s:i]:
                    negcount = match(w, negcount)
                #print "negcount:", negcount
                s = i + 1

            # 如果是感叹号，表示已经到本句句尾
            elif word == "！" or word == "!":
                for w2 in seg_sent[::-1]:  # 倒序扫描感叹号前的情感词，发现后权值+2，然后退出循环
                    if w2 in posdict:
                        poscount += 2
                        break
                    elif w2 in negdict:
                        negcount += 2
                        break
            i += 1
        #print "poscount,negcount", poscount, negcount
        single_review_senti_score.append(
            transform_to_positive_num(poscount, negcount))  # 对得分做最后处理
    pos_result, neg_result = 0, 0  # 分别记录积极情感总得分和消极情感总得分
    for res1, res2 in single_review_senti_score:  # 每个分句循环累加
        pos_result += res1
        neg_result += res2
    #print pos_result, neg_result
    result = pos_result - neg_result  # 该条评论情感的最终得分
    result = round(result, 1)
    return result

예제 #5

0

파일 보기

파일: dict_main.py 프로젝트: Baichenjia/Graduation-design

def single_review_sentiment_score(weibo_sent):
	single_review_senti_score = []
	cuted_review = tp.cut_sentence(weibo_sent)  # 句子切分，单独对每个句子进行分析

	for sent in cuted_review:
		seg_sent = tp.segmentation(sent)   # 分词
		seg_sent = tp.del_stopwords(seg_sent)[:]
		#for w in seg_sent:
		#	print w,
		i = 0    # 记录扫描到的词的位置
		s = 0    # 记录情感词的位置
		poscount = 0    # 记录该分句中的积极情感得分
		negcount = 0    # 记录该分句中的消极情感得分

		for word in seg_sent:   # 逐词分析
			#print word
			if word in posdict:  # 如果是积极情感词
				#print "posword:", word
				poscount += 1   # 积极得分+1
				for w in seg_sent[s:i]:
					poscount = match(w, poscount)
				#print "poscount:", poscount
				s = i + 1  # 记录情感词的位置变化

			elif word in negdict:  # 如果是消极情感词
				#print "negword:", word
				negcount += 1
				for w in seg_sent[s:i]:
					negcount = match(w, negcount)
				#print "negcount:", negcount
				s = i + 1

			# 如果是感叹号，表示已经到本句句尾
			elif word == "！".decode("utf-8") or word == "!".decode('utf-8'):
				for w2 in seg_sent[::-1]:  # 倒序扫描感叹号前的情感词，发现后权值+2，然后退出循环
					if w2 in posdict:
						poscount += 2
						break
					elif w2 in negdict:
						negcount += 2
						break
			i += 1
		#print "poscount,negcount", poscount, negcount
		single_review_senti_score.append(transform_to_positive_num(poscount, negcount))   # 对得分做最后处理
	pos_result, neg_result = 0, 0   # 分别记录积极情感总得分和消极情感总得分
	for res1, res2 in single_review_senti_score:  # 每个分句循环累加
		pos_result += res1
		neg_result += res2
	#print pos_result, neg_result
	result = pos_result - neg_result   # 该条微博情感的最终得分
	result = round(result, 1)
	return result

예제 #6

0

파일 보기

파일: 小试验场.py 프로젝트: ron-tsai/-

                    if v == 'inverse':
                        v = -1
                        score = score * v
                    else:
                        score = score * v
                return score

        else:
            for i, v in enumerate(list):
                if v == 'inverse':
                    v = -1
                    score = score * v
                else:
                    score = score * v
            return score
text='骑乘收益率曲线策略是短期货币市场证券管理中流行的一种策略。'
single_review_senti_score = []
cuted_review = list(jieba.cut(text))  # 句子切分，单独对每个句子进行分析
cuted_review = tp.del_stopwords(cuted_review)
print(cuted_review)
count_list = []
for sent in cuted_review:
    count_list=match(sent,count_list)
score=mutiplication_list(count_list)

print(score)

예제 #7

0

파일 보기

파일: 草稿本.py 프로젝트: ron-tsai/-

import text_process as tp
import jieba
jieba.load_userdict(
    'E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\emotion_dict\pos_all_dict.txt'
)
jieba.load_userdict(
    'E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\emotion_dict\\neg_all_dict.txt'
)
jieba.load_userdict(
    "E:\postgraduate\\no_space_environment\category\pycharm\pycharm_file_location\\thesis\新闻文本处理\论文词典法CSDN\Sentiment_dict\degree_dict\insufficiently_inverse.txt"
)

news_sent = '我不是很看好这支股票会上涨。'
# cuted_review = tp.cut_sentence(news_sent)  # 句子切分，单独对每个句子进行分析
cuted_review = list(jieba.cut(news_sent))
print(cuted_review)
for sent in cuted_review:
    seg_sent = tp.segmentation(sent)  # 分词
    seg_sent = tp.del_stopwords(seg_sent)[:]
    print(seg_sent)