def train():
    current_path = os.path.dirname(__file__)
    neg = current_path + '\\snownlp\\sentiment\\neg.txt'
    pos = current_path + '\\snownlp\\sentiment\\pos.txt'
    parm = current_path + '\\snownlp\\sentiment\\sentiment.marshal'
    sentiment.train(neg, pos)
    sentiment.save(parm)
Exemple #2
0
def save_train(request):
    f = open(
        '/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/snownlp/sentiment/SGMW_pos.txt',
        'w')
    high_processeds = Comments.objects.filter(arti_score__gt=0.5, is_scored=1)
    for i in high_processeds:
        high_list = i.content
        f.write('\n')
        f.write(high_list)
    f.close()

    f = open(
        '/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/snownlp/sentiment/SGMW_neg.txt',
        'w')
    low_processeds = Comments.objects.filter(arti_score__lte=0.5, is_scored=1)
    for i in low_processeds:
        low_list = i.content
        f.write('\n')
        f.write(low_list)
    f.close()

    sentiment.train(
        '/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/snownlp/sentiment/SGMW_neg.txt',
        '/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/snownlp/sentiment/SGMW_pos.txt'
    )
    sentiment.save(
        '/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/snownlp/sentiment/SGMW_sentiment.marshal'
    )

    return JsonResponse({"msg": "success"})
Exemple #3
0
def train():
    time_str = (datetime.datetime.now().strftime('%Y%m%d %H%M%S'))  #日期格式化
    print("prepare...")
    global start
    start = time.time()
    sentiment.train('target/neg.txt', 'target/pos.txt', printPer)
    sentiment.save('target/sen %s.marshal' % time_str)
Exemple #4
0
def train_model(neg_path, pos_path):
    neg_path = os.path.abspath(os.path.join(os.getcwd(), 'neg.txt'))
    pos_path = os.path.abspath(os.path.join(os.getcwd(), 'pos.txt'))
    mod_path = os.path.abspath(os.path.join(os.getcwd(), 'sentiment.marshal'))
    sentiment.train(neg_path, pos_path)
    sentiment.save(mod_path)
    return mod_path
Exemple #5
0
 def post(self, request):
     print(request.POST)
     cand_id = -1
     senti = -1
     if 'submit' in request.POST:
         for k in request.POST.keys():
             if k.startswith("group"):
                 try:
                     cand_id = int(k[5:])
                     senti = int(request.POST.get(k))
                 except ValueError:
                     cand_id = -1
                     senti = -1
         if cand_id > 0:
             self.cursor.execute(
                 self.select_candidate_by_id.format(cand_id))
             cand_weibo = self.cursor.fetchone()
             if cand_weibo and senti >= 0:
                 weibo_url = cand_weibo[1]
                 weibo_content = cand_weibo[2]
                 weibo_wb_from = cand_weibo[3]
                 weibo_sentiment = senti
                 self.cursor.execute(
                     self.insert_into_weibo.format(weibo_url, weibo_content,
                                                   weibo_wb_from,
                                                   weibo_sentiment))
                 self.cursor.execute(self.delete_candidate.format(cand_id))
                 self.db.commit()
         return redirect('index')
     elif 'refresh' in request.POST:
         print('refresh')
         with open('neg_updated.txt', 'w', encoding='utf-8') as neg_writer:
             with open('neg.txt', 'r', encoding='utf-8') as neg_reader:
                 while True:
                     line = neg_reader.readline()
                     if not line:
                         break
                     line = line.strip()
                     neg_writer.write(line + '\n')
             self.cursor.execute(self.select_all_weibos.format(0))
             neg_results = self.cursor.fetchall()
             for neg_result in neg_results:
                 neg_writer.write(neg_result[2].strip() + '\n')
         with open('pos_updated.txt', 'w', encoding='utf-8') as pos_writer:
             with open('pos.txt', 'r', encoding='utf-8') as pos_reader:
                 while True:
                     line = pos_reader.readline()
                     if not line:
                         break
                     line = line.strip()
                     pos_writer.write(line + '\n')
             self.cursor.execute(self.select_all_weibos.format(1))
             pos_results = self.cursor.fetchall()
             for pos_result in pos_results:
                 pos_writer.write(pos_result[2].strip() + '\n')
         print("开始训练新模型...")
         sentiment.train('neg_updated.txt', 'pos_updated.txt')
         sentiment.save('sentiment.marshal')
         print("训练完成!")
         return redirect('index')
def selfTrainSentiment(infile):
    sentiment.train("pos", "neg")
    sentiment.save('sentfwhyj.marshal')
    sentiments_list = sentiment_analysis(infile)
    x = range(len(sentiments_list))
    pl.plot(x, sentiments_list, 'b.')
    pl.xlabel('sample')
    pl.ylabel('score')
    pl.show()
Exemple #7
0
def train_and_test():
    from snownlp import sentiment
    print("start")
    sentiment.train('eastmoney_neg.txt', 'eastmoney_pos.txt')
    print("finish")
    sentiment.save('sentiment.marshal')

    sentiment_rating = SnowNLP("明天涨停").sentiments
    print(sentiment_rating)
 def trainSentimentCorpus(self, negPath, posPath, target_encoding):
     """训练Sentiment语料库"""
     self.convertEncoding(negPath, target_encoding)
     self.convertEncoding(posPath, target_encoding)
     # pos_docs = codecs.open(posPath, 'r', 'utf-8').readlines()
     sentiment.train(neg_file=negPath, pos_file=posPath)
     path_name = 'sentiment_Jxl_line'
     print u'数据训练完毕,即将保存{}.marshal文件'.format(path_name)
     sentiment.save('{}.marshal'.format(path_name))
     print u'保存完毕!'
def train_snowNLP(table):
    filter_comment.filter_opppsive_comments(table)
    neg_file = FILE_PATH + 'train_files/' + table + '_neg.txt'
    pos_file = FILE_PATH + 'train_files/' + table + '_pos.txt'
    get_sentiment_file(DATA_PATH + table + '/big_files/' + 'positive.txt',
                       pos_file)
    get_sentiment_file(DATA_PATH + table + '/big_files/' + 'negative.txt',
                       neg_file)
    file_path = 'F:/computer_science/python3/lib/site-packages/snownlp/sentiment/'
    sentiment.train(neg_file, pos_file)
    sentiment.save(file_path + table + '.marshal')
Exemple #10
0
def train():
    if 'raw_data.csv' in dirlist:
        df = pd.read_csv('raw_data.csv')
    else:
        raise Exception('请先创建raw_data.csv文件')
    df.fillna('nan')
    commands = df.评论内容.dropna().tolist()
    models(commands)
    sentiment.train('neg.txt', 'pos.txt')
    sentiment.save('mysentiment.marshal')
    print('得到模型后需拷贝到snownlp的sentiment文件夹下\
        并修改__init.py__的路径来加载新权重')
Exemple #11
0
def train_model():
    data= pd.read_csv(r"./Train/weibo_senti_100k/weibo_senti_100k.csv",header=0)
    data = data.sample(frac = 1)
    train = data.iloc[:110000,[0,1]]
    test = data.iloc[110000:,[0,1]]
    train_neg = train.iloc[:, 1][train.label == 0]
    train_pos = train.iloc[:, 1][train.label == 1]
    train_neg.to_csv(r"./Train/weibo_senti_100k/neg.csv", index=0, header=0)
    train_pos.to_csv(r"./Train/weibo_senti_100k/pos.csv", index=0, header=0)
    test.to_csv(r"./Train/weibo_senti_100k/test.csv",index=0,columns=['label','review'])
    sentiment.train(r'./Train/weibo_senti_100k/neg.csv',r'./Train/weibo_senti_100k/pos.csv')
    sentiment.save(r'C:/Users/RA1LGUN/Anaconda3/Lib/site-packages/snownlp/sentiment/newsentiment.marshal')
Exemple #12
0
def train_model():
    data = pd.read_csv(r"./DataSet.csv", header=0)
    train = data.iloc[:40000, [1, 2]]
    test = data.iloc[40000:, [1, 2]]
    train_neg = train.iloc[:, 1][train.label == 0]
    train_pos = train.iloc[:, 1][train.label == 1]
    train_neg.to_csv(r"./neg.csv", index=0, header=0)
    train_pos.to_csv(r"./pos.csv", index=0, header=0)
    test.to_csv(r"./TestModel.csv", index=0, columns=['label', 'review'])
    sentiment.train(r'./neg.csv', r'./pos.csv')
    sentiment.save(
        r'C:/ProgramData/Miniconda3/Lib/site-packages/snownlp/sentiment/sentiment.marshal'
    )
Exemple #13
0
def train():
    pos = get_pos_reviews()
    neg = get_neg_reviews()

    pos_train, pos_test = split(pos)
    write(pos_train, "./train/pos_train")
    write(pos_test, "./train/pos_test")

    neg_train, neg_test = split(neg)
    write(neg_train, "./train/neg_train")
    write(neg_test, "./train/neg_test")

    sentiment.train("./train/neg_train", "./train/pos_train")
    sentiment.save('./train/sentiment.marshal')
def train_sentiment(use_all_data=True):
    """
    读取negative和positive来训练模型

    use_all_data选择使用所有数据还是仅仅是训练集
    :return:
    """
    print 'train model'

    if not use_all_data:
        sentiment.train('../data/train_negative.txt', '../data/train_positive.txt')
        sentiment.save('../data/train_impurity_classifier')
    else:
        sentiment.train('../data/clean_negative.txt', '../data/clean_positive.txt')
        sentiment.save('../data/impurity_classifier')
def trainEmotion():
    fn = open('thuhole_ana/analysisExisted/neg.', 'a+', encoding='utf-8')
    fp = open('thuhole_ana/analysisExisted/pos.', 'a+', encoding='utf-8')
    f = csv.reader(
        open('thuhole_ana/analysisExisted/备份.csv', 'r', encoding='utf-8'))
    for i in f:
        if i[1] == ' 0' or i[1] == ' -1':
            fn.write(i[0].replace('\n', '') + '\n')
        if i[1] == ' 1':
            fp.write(i[0].replace('\n', '') + '\n')
    fn.close()
    fp.close()
    sentiment.train('venv/Lib/site-packages/snownlp/sentiment/neg.txt',
                    'venv/Lib/site-packages/snownlp/sentiment/pos.txt')
    sentiment.save(
        'venv/Lib/site-packages/snownlp/sentiment/sentiment.marshal2')
Exemple #16
0
def f():
    # 目标模型路径
    import os
    data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             'seg.marshal')
    print("data_path:" + data_path)

    # 训练
    from snownlp import sentiment  #加载情感分析模块
    from datetime import datetime
    startTime = datetime.now()
    print(datetime.now().strftime("%X") + " 开始训练")
    sentiment.train('neg.txt', 'pos.txt')  #对语料库进行训练 可进一步构建语料库
    sentiment.save(
        data_path
    )  #这一步是对上一步的训练结果进行保存,如果以后语料库没有改变,下次不用再进行训练,直接使用就可以了,所以一定要保存,保存位置可以自己决定,但是要把`snownlp/seg/__init__.py`里的`data_path`也改成你保存的位置,不然下次使用还是默认的。
    endTime = datetime.now()
    runTime = endTime - startTime
    print(datetime.now().strftime("%X") + " 训练完毕,耗时:" + str(runTime.seconds) +
          "秒")
def SnowNLP_TRAIN(TrainPath):
    # 0.指定训练模型的保持路径
    pospath = "D:\\Users\\Musk18\\Desktop\\数据挖掘课设\\pos.txt"
    negpath = "D:\\Users\\Musk18\\Desktop\\数据挖掘课设\\neg.txt"
    sentimentpath = "F:/ProgramData/Anaconda3/envs/untitled2/Lib/site-packages/snownlp/sentiment/sentiment.marshal"
    # 1.将爬取的影评按照打分,分为正负样本,并分别保存,正样本保存到pos.txt,负样本保存到neg.txt
    posfile = open(pospath, 'w', encoding='utf-8')
    negfile = open(negpath, 'w', encoding='utf-8')
    df = pd.read_excel(TrainPath + '评分和影评.xls')
    i = 0
    for data in df['评分']:
        if data >= 4:
            posfile.write(str(df['评论'][i]) + '\n')
        elif data <= 2:
            negfile.write(str(df['评论'][i]) + '\n')
        i = i + 1
    # 2.利用snownlp训练新的模型
    sentiment.train(negpath, pospath)
    # 3.保存好新训练的模型
    sentiment.save(sentimentpath)
    print('训练完毕!模型已替换!')
def train_model(text_set,train_frequency):
    '''
    :param text_set: 文本集合
    :param train_frequency: 训练次数
    :return:
    '''
    # 情感模型训练,分值大于0.8判断为积极,分值小于0.3判断为消极
    for i in range(1,train_frequency + 1):
        print('开始第{}次训练'.format(i))
        for text in text_set:
            sub_text = ','.join(re.findall("([\u4E00-\u9FA5]+)", text))
            socre = SnowNLP(sub_text)
            if socre.sentiments > 0.8:
                with open('pos.txt', mode='a', encoding='utf-8') as g:
                    g.writelines(sub_text + "\n")
            elif socre.sentiments < 0.3:
                with open('neg.txt', mode='a', encoding='utf-8') as f:
                    f.writelines(sub_text + "\n")
            else:
                pass
        sentiment.train('neg.txt', 'pos.txt')
        sentiment.save('sentiment.marshal')
Exemple #19
0
import sys
import pandas as pd  #加载pandas
from snownlp import sentiment  #加载情感分析模块
from snownlp import SnowNLP

text = pd.read_excel(u'D:/自然语言处理/川大相关微博内容.xlsx', header=0)  # 读取文本数据
text0 = text.iloc[:, 0]  # 提取所有数据
text1 = [i.decode('utf-8') for i in text0]  # 上一步提取数据不是字符而是object,所以在这一步进行转码为字符

#对语料库进行训练,把路径改成相应的位置
sentiment.train('D:/Anaconda3/Lib/site-packages/snownlp/sentiment/neg.txt',
                'D:/Anaconda3/Lib/site-packages/snownlp/sentiment/pos.txt')
#这一步是对上一步的训练结果进行保存,如果以后语料库没有改变,下次不用再进行训练
sentiment.save('D:/pyscript/sentiment.marshal')

senti = [SnowNLP(i).sentiments for i in text1]  #遍历每条评论进行预测

newsenti = []
for i in senti:
    if (i >= 0.6):
        newsenti.append(1)
    else:
        newsenti.append(-1)
text[
    'predict'] = newsenti  #将新的预测标签增加为text的某一列,所以现在text的第0列为评论文本,第1列为实际标签,第2列为预测标签
counts = 0
for j in range(len(text.iloc[:, 0])):  #遍历所有标签,将预测标签和实际标签进行比较,相同则判断正确。
    if text.iloc[j, 2] == text.iloc[j, 1]:
        counts += 1
print("准确率为:%f", (float(counts) / float(len(text))))  #输出本次预测的准确率
def train(path):
    """训练正向和负向情感数据集,并保存训练模型"""
    sentiment.train(f'{path}/差评.csv', f'{path}/好评.csv')
    sentiment.save('./sentiment.marshal')
Exemple #21
0
def train():
    sentiment.train('D:\\neg.txt', 'D:\\pos.txt')
    sentiment.save('sentiment.marshal')
Exemple #22
0
def train_material():
    sentiment.train('neg.txt', 'pos.txt')
    sentiment.save('sentiment.marshal')
#-*-coding:utf-8-*-
from snownlp import sentiment
sentiment.train(
    'neg.txt', 'pos.txt')  #消极文本,积极文本      txt格式按行存储     记得修改txt编码为utf8,另存为时有选项
sentiment.save('my_sentiment.marshal')  #生成训练文件

#训练好后把生成的文件放到下面文件夹里
#D:\Python2.7\Lib\site-packages\snownlp\sentiment
#然后修改D:\Python2.7\Lib\site-packages\snownlp\sentiment\__init__.py里的data_path

#mac里:
#/Library/Python/2.7/site-packages/snownlp/sentiment
Exemple #24
0
def train_my_data():
    # 重新训练模型
    sentiment.train(r'Data/Output/neg.txt', r'Data/Output/pos.txt')
    # 保存好新训练的模型
    sentiment.save(r'Data/Output/sentiment.marshal')
Exemple #25
0
def TrainAndSave(negfile, posfile):
    sentiment.train(negfile, posfile)
    sentiment.save('sentiment.marshal')
def train():
    sentiment.train('F:/Anaconda/Lib/site-packages/snownlp/sentiment/neg.txt','F:/Anaconda/Lib/site-packages/snownlp/sentiment/pos.txt')
    sentiment.save('F:/Anaconda/Lib/site-packages/snownlp/sentiment/sentiment2.marshal')
def train(neg_file, pos_file, to):
    sentiment.train(neg_file, pos_file)
    sentiment.save(to)
Exemple #28
0
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
本文件使用data文件夹下的neg.txt和pos.txt情感语料库对snownlp包的情感分析进行训练,提高情感分析准确率
由于未找到合适的语料库资源>_<,人为分类语料库信息效率极低(我尝试过...), 故我直接使用snownlp默认的购买商品评价语料库...
此处后续后提升
"""
from snownlp import sentiment
import os


path = os.path.abspath(os.path.dirname(__file__))
price_data_path = os.path.join(path+'/data/', price_file_name)

sentiment.train('neg.txt', 'pos.txt')
sentiment.save(path[:-5]+'/snownlp/sentiment/gold_sentiment.marshal')

# 训练完后在/snownlp/sentiment文件夹下__init__.py修改指定的marshal文件才可使用
Exemple #29
0
def train_model():
    #自行百度如何训练snownlp模型,实验室的同学请自己在实验室电脑里拿数据,github上没有
    from snownlp import sentiment
    sentiment.train('/home/hadoopnew/neg.txt', '/home/hadoopnew/pos.txt')
    sentiment.save('sentiment.marshal_knee')
Exemple #30
0
def snow_train_disposable(file_post=''):
    sentiment.train(neg_file='data\\neg_' + file_post + '.txt', pos_file='data\\pos_' + file_post + '.txt')
    sentiment.save('data\\sentiment.marshal_' + file_post)
Exemple #31
0
        clean_txt=re.sub(r'[A-Za-z\d]*','',clean_txt)
        # 对文本进行分词
        clean_list=clean_txt.split('\n')
        return clean_list




if __name__ == '__main__':
  

#####################################################################
    print("正在加载训练集...")
    # 必须传入positive.txt和negative.txt
    sentiment.train('./../../Resources/sentiment_folders/hotel/positive.txt', './../../Resources/sentiment_folders/hotel/neg.txt') # 修改
    sentiment.save('sentiment.marshal')
    
    #############################################
    # # 测试的json文件
    # filename='./../../Resources/jsonfiles/ChnSentiCorp.json' # 修改
    # type_list,content_list=file_op.readfile(filename)
    ###############################################
    # 知乎的评论内容作为测试集
    comment_file='./../../Resources/CutWordPath/sentiment_comment.txt'
    content_list=Read_comment_file(comment_file)
    ###################################################

    # 进行snownlp情感分析
    sentences,sentences_score=sentiment_snownlp(content_list)

    # 绘图,返回情感得分字典和列表
Exemple #32
0
# coding: utf-8
from snownlp import SnowNLP,sentiment
import os.path
base = os.path.dirname(__file__)
pos = os.path.join(base,'model/sentiment/pos.txt')
neg = os.path.join(base,'model/sentiment/neg.txt')
tagdest = os.path.join(base,'model/sentiment/sentiment.marshal')
sentiment.train(neg,pos)
sentiment.save(tagdest)