def train_model(neg_path, pos_path): neg_path = os.path.abspath(os.path.join(os.getcwd(), 'neg.txt')) pos_path = os.path.abspath(os.path.join(os.getcwd(), 'pos.txt')) mod_path = os.path.abspath(os.path.join(os.getcwd(), 'sentiment.marshal')) sentiment.train(neg_path, pos_path) sentiment.save(mod_path) return mod_path
def post(self, request): print(request.POST) cand_id = -1 senti = -1 if 'submit' in request.POST: for k in request.POST.keys(): if k.startswith("group"): try: cand_id = int(k[5:]) senti = int(request.POST.get(k)) except ValueError: cand_id = -1 senti = -1 if cand_id > 0: self.cursor.execute( self.select_candidate_by_id.format(cand_id)) cand_weibo = self.cursor.fetchone() if cand_weibo and senti >= 0: weibo_url = cand_weibo[1] weibo_content = cand_weibo[2] weibo_wb_from = cand_weibo[3] weibo_sentiment = senti self.cursor.execute( self.insert_into_weibo.format(weibo_url, weibo_content, weibo_wb_from, weibo_sentiment)) self.cursor.execute(self.delete_candidate.format(cand_id)) self.db.commit() return redirect('index') elif 'refresh' in request.POST: print('refresh') with open('neg_updated.txt', 'w', encoding='utf-8') as neg_writer: with open('neg.txt', 'r', encoding='utf-8') as neg_reader: while True: line = neg_reader.readline() if not line: break line = line.strip() neg_writer.write(line + '\n') self.cursor.execute(self.select_all_weibos.format(0)) neg_results = self.cursor.fetchall() for neg_result in neg_results: neg_writer.write(neg_result[2].strip() + '\n') with open('pos_updated.txt', 'w', encoding='utf-8') as pos_writer: with open('pos.txt', 'r', encoding='utf-8') as pos_reader: while True: line = pos_reader.readline() if not line: break line = line.strip() pos_writer.write(line + '\n') self.cursor.execute(self.select_all_weibos.format(1)) pos_results = self.cursor.fetchall() for pos_result in pos_results: pos_writer.write(pos_result[2].strip() + '\n') print("开始训练新模型...") sentiment.train('neg_updated.txt', 'pos_updated.txt') sentiment.save('sentiment.marshal') print("训练完成!") return redirect('index')
def train(): current_path = os.path.dirname(__file__) neg = current_path + '\\snownlp\\sentiment\\neg.txt' pos = current_path + '\\snownlp\\sentiment\\pos.txt' parm = current_path + '\\snownlp\\sentiment\\sentiment.marshal' sentiment.train(neg, pos) sentiment.save(parm)
def save_train(request): f = open( '/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/snownlp/sentiment/SGMW_pos.txt', 'w') high_processeds = Comments.objects.filter(arti_score__gt=0.5, is_scored=1) for i in high_processeds: high_list = i.content f.write('\n') f.write(high_list) f.close() f = open( '/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/snownlp/sentiment/SGMW_neg.txt', 'w') low_processeds = Comments.objects.filter(arti_score__lte=0.5, is_scored=1) for i in low_processeds: low_list = i.content f.write('\n') f.write(low_list) f.close() sentiment.train( '/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/snownlp/sentiment/SGMW_neg.txt', '/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/snownlp/sentiment/SGMW_pos.txt' ) sentiment.save( '/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/snownlp/sentiment/SGMW_sentiment.marshal' ) return JsonResponse({"msg": "success"})
def train(): time_str = (datetime.datetime.now().strftime('%Y%m%d %H%M%S')) #日期格式化 print("prepare...") global start start = time.time() sentiment.train('target/neg.txt', 'target/pos.txt', printPer) sentiment.save('target/sen %s.marshal' % time_str)
def train_and_test(): from snownlp import sentiment print("start") sentiment.train('eastmoney_neg.txt', 'eastmoney_pos.txt') print("finish") sentiment.save('sentiment.marshal') sentiment_rating = SnowNLP("明天涨停").sentiments print(sentiment_rating)
def selfTrainSentiment(infile): sentiment.train("pos", "neg") sentiment.save('sentfwhyj.marshal') sentiments_list = sentiment_analysis(infile) x = range(len(sentiments_list)) pl.plot(x, sentiments_list, 'b.') pl.xlabel('sample') pl.ylabel('score') pl.show()
def trainSentimentCorpus(self, negPath, posPath, target_encoding): """训练Sentiment语料库""" self.convertEncoding(negPath, target_encoding) self.convertEncoding(posPath, target_encoding) # pos_docs = codecs.open(posPath, 'r', 'utf-8').readlines() sentiment.train(neg_file=negPath, pos_file=posPath) path_name = 'sentiment_Jxl_line' print u'数据训练完毕,即将保存{}.marshal文件'.format(path_name) sentiment.save('{}.marshal'.format(path_name)) print u'保存完毕!'
def train_snowNLP(table): filter_comment.filter_opppsive_comments(table) neg_file = FILE_PATH + 'train_files/' + table + '_neg.txt' pos_file = FILE_PATH + 'train_files/' + table + '_pos.txt' get_sentiment_file(DATA_PATH + table + '/big_files/' + 'positive.txt', pos_file) get_sentiment_file(DATA_PATH + table + '/big_files/' + 'negative.txt', neg_file) file_path = 'F:/computer_science/python3/lib/site-packages/snownlp/sentiment/' sentiment.train(neg_file, pos_file) sentiment.save(file_path + table + '.marshal')
def train(): if 'raw_data.csv' in dirlist: df = pd.read_csv('raw_data.csv') else: raise Exception('请先创建raw_data.csv文件') df.fillna('nan') commands = df.评论内容.dropna().tolist() models(commands) sentiment.train('neg.txt', 'pos.txt') sentiment.save('mysentiment.marshal') print('得到模型后需拷贝到snownlp的sentiment文件夹下\ 并修改__init.py__的路径来加载新权重')
def train_model(): data= pd.read_csv(r"./Train/weibo_senti_100k/weibo_senti_100k.csv",header=0) data = data.sample(frac = 1) train = data.iloc[:110000,[0,1]] test = data.iloc[110000:,[0,1]] train_neg = train.iloc[:, 1][train.label == 0] train_pos = train.iloc[:, 1][train.label == 1] train_neg.to_csv(r"./Train/weibo_senti_100k/neg.csv", index=0, header=0) train_pos.to_csv(r"./Train/weibo_senti_100k/pos.csv", index=0, header=0) test.to_csv(r"./Train/weibo_senti_100k/test.csv",index=0,columns=['label','review']) sentiment.train(r'./Train/weibo_senti_100k/neg.csv',r'./Train/weibo_senti_100k/pos.csv') sentiment.save(r'C:/Users/RA1LGUN/Anaconda3/Lib/site-packages/snownlp/sentiment/newsentiment.marshal')
def train_model(): data = pd.read_csv(r"./DataSet.csv", header=0) train = data.iloc[:40000, [1, 2]] test = data.iloc[40000:, [1, 2]] train_neg = train.iloc[:, 1][train.label == 0] train_pos = train.iloc[:, 1][train.label == 1] train_neg.to_csv(r"./neg.csv", index=0, header=0) train_pos.to_csv(r"./pos.csv", index=0, header=0) test.to_csv(r"./TestModel.csv", index=0, columns=['label', 'review']) sentiment.train(r'./neg.csv', r'./pos.csv') sentiment.save( r'C:/ProgramData/Miniconda3/Lib/site-packages/snownlp/sentiment/sentiment.marshal' )
def train(): pos = get_pos_reviews() neg = get_neg_reviews() pos_train, pos_test = split(pos) write(pos_train, "./train/pos_train") write(pos_test, "./train/pos_test") neg_train, neg_test = split(neg) write(neg_train, "./train/neg_train") write(neg_test, "./train/neg_test") sentiment.train("./train/neg_train", "./train/pos_train") sentiment.save('./train/sentiment.marshal')
def train_sentiment(use_all_data=True): """ 读取negative和positive来训练模型 use_all_data选择使用所有数据还是仅仅是训练集 :return: """ print 'train model' if not use_all_data: sentiment.train('../data/train_negative.txt', '../data/train_positive.txt') sentiment.save('../data/train_impurity_classifier') else: sentiment.train('../data/clean_negative.txt', '../data/clean_positive.txt') sentiment.save('../data/impurity_classifier')
def nlp(filepath, neg, pos): text = pd.read_excel(filepath) contents = text.iloc[:, 0] contents_t = contents.values.tolist() sentiment.train(neg, pos) sent = [SnowNLP(i).sentiments for i in contents_t] predict = [] # 大于0.5则输出1,小于0.5则输出-1 for i in sent: if (i >= 0.5): predict.append(1) else: predict.append(-1) text['predict'] = predict text.to_excel('G:\\content_data.xlsx') return text
def trainEmotion(): fn = open('thuhole_ana/analysisExisted/neg.', 'a+', encoding='utf-8') fp = open('thuhole_ana/analysisExisted/pos.', 'a+', encoding='utf-8') f = csv.reader( open('thuhole_ana/analysisExisted/备份.csv', 'r', encoding='utf-8')) for i in f: if i[1] == ' 0' or i[1] == ' -1': fn.write(i[0].replace('\n', '') + '\n') if i[1] == ' 1': fp.write(i[0].replace('\n', '') + '\n') fn.close() fp.close() sentiment.train('venv/Lib/site-packages/snownlp/sentiment/neg.txt', 'venv/Lib/site-packages/snownlp/sentiment/pos.txt') sentiment.save( 'venv/Lib/site-packages/snownlp/sentiment/sentiment.marshal2')
def f(): # 目标模型路径 import os data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'seg.marshal') print("data_path:" + data_path) # 训练 from snownlp import sentiment #加载情感分析模块 from datetime import datetime startTime = datetime.now() print(datetime.now().strftime("%X") + " 开始训练") sentiment.train('neg.txt', 'pos.txt') #对语料库进行训练 可进一步构建语料库 sentiment.save( data_path ) #这一步是对上一步的训练结果进行保存,如果以后语料库没有改变,下次不用再进行训练,直接使用就可以了,所以一定要保存,保存位置可以自己决定,但是要把`snownlp/seg/__init__.py`里的`data_path`也改成你保存的位置,不然下次使用还是默认的。 endTime = datetime.now() runTime = endTime - startTime print(datetime.now().strftime("%X") + " 训练完毕,耗时:" + str(runTime.seconds) + "秒")
def SnowNLP_TRAIN(TrainPath): # 0.指定训练模型的保持路径 pospath = "D:\\Users\\Musk18\\Desktop\\数据挖掘课设\\pos.txt" negpath = "D:\\Users\\Musk18\\Desktop\\数据挖掘课设\\neg.txt" sentimentpath = "F:/ProgramData/Anaconda3/envs/untitled2/Lib/site-packages/snownlp/sentiment/sentiment.marshal" # 1.将爬取的影评按照打分,分为正负样本,并分别保存,正样本保存到pos.txt,负样本保存到neg.txt posfile = open(pospath, 'w', encoding='utf-8') negfile = open(negpath, 'w', encoding='utf-8') df = pd.read_excel(TrainPath + '评分和影评.xls') i = 0 for data in df['评分']: if data >= 4: posfile.write(str(df['评论'][i]) + '\n') elif data <= 2: negfile.write(str(df['评论'][i]) + '\n') i = i + 1 # 2.利用snownlp训练新的模型 sentiment.train(negpath, pospath) # 3.保存好新训练的模型 sentiment.save(sentimentpath) print('训练完毕!模型已替换!')
def train_model(text_set,train_frequency): ''' :param text_set: 文本集合 :param train_frequency: 训练次数 :return: ''' # 情感模型训练,分值大于0.8判断为积极,分值小于0.3判断为消极 for i in range(1,train_frequency + 1): print('开始第{}次训练'.format(i)) for text in text_set: sub_text = ','.join(re.findall("([\u4E00-\u9FA5]+)", text)) socre = SnowNLP(sub_text) if socre.sentiments > 0.8: with open('pos.txt', mode='a', encoding='utf-8') as g: g.writelines(sub_text + "\n") elif socre.sentiments < 0.3: with open('neg.txt', mode='a', encoding='utf-8') as f: f.writelines(sub_text + "\n") else: pass sentiment.train('neg.txt', 'pos.txt') sentiment.save('sentiment.marshal')
def train_model(): #自行百度如何训练snownlp模型,实验室的同学请自己在实验室电脑里拿数据,github上没有 from snownlp import sentiment sentiment.train('/home/hadoopnew/neg.txt', '/home/hadoopnew/pos.txt') sentiment.save('sentiment.marshal_knee')
def train(path): """训练正向和负向情感数据集,并保存训练模型""" sentiment.train(f'{path}/差评.csv', f'{path}/好评.csv') sentiment.save('./sentiment.marshal')
def train_material(): sentiment.train('neg.txt', 'pos.txt') sentiment.save('sentiment.marshal')
# coding: utf-8 from snownlp import SnowNLP,sentiment import os.path base = os.path.dirname(__file__) pos = os.path.join(base,'model/sentiment/pos.txt') neg = os.path.join(base,'model/sentiment/neg.txt') tagdest = os.path.join(base,'model/sentiment/sentiment.marshal') sentiment.train(neg,pos) sentiment.save(tagdest)
#-*-coding:utf-8-*- from snownlp import sentiment sentiment.train( 'neg.txt', 'pos.txt') #消极文本,积极文本 txt格式按行存储 记得修改txt编码为utf8,另存为时有选项 sentiment.save('my_sentiment.marshal') #生成训练文件 #训练好后把生成的文件放到下面文件夹里 #D:\Python2.7\Lib\site-packages\snownlp\sentiment #然后修改D:\Python2.7\Lib\site-packages\snownlp\sentiment\__init__.py里的data_path #mac里: #/Library/Python/2.7/site-packages/snownlp/sentiment
from snownlp import SnowNLP from snownlp import sentiment import csv f1 = open('./pos.txt', 'a+', encoding='utf-8') # 存放正面 名字也可自定义哦 f2 = open('./neg.txt', 'a+', encoding='utf-8') # 存放负面 with open('微博信息20191208-20200122.csv', 'r', encoding='utf-8') as f: reader = csv.DictReader(f) for i in reader: mytxt = open('commentqinggan.txt', mode='a', encoding='utf-8') s = SnowNLP(dict(i)['正文']) if s.sentiments < 0.25: f2.write(dict(i)['正文']) f2.write('\n') if s.sentiments > 0.8: f1.write(dict(i)['正文']) f1.write('\n') print(dict(i)['正文'], s.sentiments, file=mytxt) # 保存此次的训练模型 sentiment.train('neg.txt', 'pos.txt') # 生成新的训练模型 sentiment.save('sentiment.marshal')
def train_my_data(): # 重新训练模型 sentiment.train(r'Data/Output/neg.txt', r'Data/Output/pos.txt') # 保存好新训练的模型 sentiment.save(r'Data/Output/sentiment.marshal')
import pandas as pd from snownlp import SnowNLP, sentiment sentiment.train('E:\\data\\low.txt','E:\\data\\high.txt') mix=pd.read_csv('E:\\data\\mix.csv',encoding='gbk') mix=mix.dropna() right=0 wrong=0 for i in mix.index: txt=mix.loc[i,'txt'] fen=mix.loc[i,'fen'] s = SnowNLP(txt) f=s.sentiments print((f,fen)) if f>0.5 and fen>2.5 or f<0.5 and fen<2.5: right += 1 else: wrong += 1 print(right/(right+wrong))
def TrainAndSave(negfile, posfile): sentiment.train(negfile, posfile) sentiment.save('sentiment.marshal')
def train(): sentiment.train('F:/Anaconda/Lib/site-packages/snownlp/sentiment/neg.txt','F:/Anaconda/Lib/site-packages/snownlp/sentiment/pos.txt') sentiment.save('F:/Anaconda/Lib/site-packages/snownlp/sentiment/sentiment2.marshal')
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Thu Jan 4 09:59:46 2018 @author: Ming JIN """ from snownlp import sentiment sentiment.train('negative_dict.txt', 'positive_dict.txt') sentiment.save('sentiment.marshal')
# 去除英文字符和数字 clean_txt=re.sub(r'[A-Za-z\d]*','',clean_txt) # 对文本进行分词 clean_list=clean_txt.split('\n') return clean_list if __name__ == '__main__': ##################################################################### print("正在加载训练集...") # 必须传入positive.txt和negative.txt sentiment.train('./../../Resources/sentiment_folders/hotel/positive.txt', './../../Resources/sentiment_folders/hotel/neg.txt') # 修改 sentiment.save('sentiment.marshal') ############################################# # # 测试的json文件 # filename='./../../Resources/jsonfiles/ChnSentiCorp.json' # 修改 # type_list,content_list=file_op.readfile(filename) ############################################### # 知乎的评论内容作为测试集 comment_file='./../../Resources/CutWordPath/sentiment_comment.txt' content_list=Read_comment_file(comment_file) ################################################### # 进行snownlp情感分析 sentences,sentences_score=sentiment_snownlp(content_list)
import sys import pandas as pd #加载pandas from snownlp import sentiment #加载情感分析模块 from snownlp import SnowNLP text = pd.read_excel(u'D:/自然语言处理/川大相关微博内容.xlsx', header=0) # 读取文本数据 text0 = text.iloc[:, 0] # 提取所有数据 text1 = [i.decode('utf-8') for i in text0] # 上一步提取数据不是字符而是object,所以在这一步进行转码为字符 #对语料库进行训练,把路径改成相应的位置 sentiment.train('D:/Anaconda3/Lib/site-packages/snownlp/sentiment/neg.txt', 'D:/Anaconda3/Lib/site-packages/snownlp/sentiment/pos.txt') #这一步是对上一步的训练结果进行保存,如果以后语料库没有改变,下次不用再进行训练 sentiment.save('D:/pyscript/sentiment.marshal') senti = [SnowNLP(i).sentiments for i in text1] #遍历每条评论进行预测 newsenti = [] for i in senti: if (i >= 0.6): newsenti.append(1) else: newsenti.append(-1) text[ 'predict'] = newsenti #将新的预测标签增加为text的某一列,所以现在text的第0列为评论文本,第1列为实际标签,第2列为预测标签 counts = 0 for j in range(len(text.iloc[:, 0])): #遍历所有标签,将预测标签和实际标签进行比较,相同则判断正确。 if text.iloc[j, 2] == text.iloc[j, 1]: counts += 1 print("准确率为:%f", (float(counts) / float(len(text)))) #输出本次预测的准确率