コード例 #1
0
def test_neutral(to_stdout=True):
    """
    neutral表示作者的话
    :param to_stdout:
    :return:
    """
    print 'test neutral'

    sentiment.load('../data/impurity_classifier')
    result_file = None
    if not to_stdout:
        result_file = codecs.open('../data/result.csv', 'w', encoding='gbk', errors='ignore')

    with codecs.open('../data/clean_neutral.txt', encoding='utf-8') as neutral_file:
        for line in neutral_file:
            line = line.strip()
            prob = sentiment.classify(line)
            if 0.8 > prob > 0.2:
                if to_stdout:
                    print (line + ',' + str(prob > 0.5 and 1 or 0) + ',' + str(prob) + cur_linesep).encode('gbk')
                    raw_input('press enter to continue')
                else:
                    result_file.write(line + ',' + str(prob > 0.5 and 1 or 0) + ',' + str(prob) + cur_linesep)

    if not to_stdout:
        result_file.close()
コード例 #2
0
def classify(cases):
    """
    测试列表中每个句子的
    :param cases:
    :return:
    """
    for case in cases:
        case = clean_impurity(case)
        sentiment.load('../data/impurity_classifier')
        prob = sentiment.classify(case)
        print (case + ',' + str(prob > 0.5 and 1 or 0) + ',' + str(prob) + cur_linesep).encode('gbk')
コード例 #3
0
def test_sentiment():
    print 'test model'

    sentiment.load('../data/train_impurity_classifier')

    print 'test_negative'
    # with codecs.open('../data/test_negative.txt', encoding='utf-8') as negative_file:
    #     for line in negative_file:
    #         if sentiment.classify(line) > 0.1:
    #             print line,

    raw_input('press enter to continue')
    print 'test_positive'
    with codecs.open('../data/test_positive.txt', encoding='utf-8') as positive_file:
        for line in positive_file:
            if sentiment.classify(line) < 0.5:
                print line,
コード例 #4
0
ファイル: discover_feature.py プロジェクト: s19293949/-
"""
Created on Wed Oct 10 10:47:24 2018

@author: Administrator
"""
import re
import pandas as pd
import jieba
import jieba.analyse
import jieba.posseg as pseg
from gensim import corpora, models, similarities
from snownlp import SnowNLP
from snownlp import sentiment

sentiment.load(
    'D:\\anaconda\\anaconda\\pkgs\\snownlp-0.12.3\\snownlp\\sentiment\\sentiment.marshal'
)
jieba.load_userdict('D:\\anaconda\\anaconda\\pkgs\\jieba-0.39\\jieba\\jbj.txt')


#发现某类特征
def discover_feature(data, *text):
    key_list = []
    for key in data:
        #句子根据标点符号分句
        keys = re.split('[,~。!?、,. ]', key)
        for i in keys:
            for keyword in text:
                if keyword in i:
                    try:
                        #re匹配关键词之后的字段
コード例 #5
0
    0:"nm 给分", # 课程给分情况
    1:"rr 课程", # 某位老师开设的课程
    2:"nm 类型", # 某门课的类型
    3:"rr sst 课程",    # 某位老师开设的某种类型的课程
    4:"ut 课程", # 某个学院开设了什么课程
    5:"ut sst 课程", #某个学院开设的某种类型的课程
    6:"rr rr 课程",# 教师a与教师b共同上的课
    7:"rr 课程数量",# 某位老师的授课数量
    8:"ut sst 给分好课程", #某个学院开设的给分好的课程
    9:"sst 给分好的课", # 某个类型里给分不错的课
}

# Load the pretrained sentiment classification model

data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),'sentiment.marshal')
sentiment.load(data_path)

class QuestionTemplate():
    def __init__(self):
        self.q_template_dict = {
            0:self.get_course_rating,
            1:self.get_teacher_courses,
            2:self.get_course_type,
            3:self.get_teacher_type_courses,
            4:self.get_school_courses,
            5:self.get_school_type_courses,
            6:self.get_course_of_2_teacher,
            7:self.get_teacher_course_num,
            8:self.get_school_good_courses,
            9:self.get_type_good_courses,
        }
コード例 #6
0
from basic.NovelStructure import *
from public.BasicStringMethod import *
from novel.cluster.NovelCleanModule import *
from novel.chapter.ChapterHtmlFilter import *
import logging
import re

debug = False
cur_delimiter = str(chr(1))  # 存储文件的分隔符

number_char_list = [
    u'0', u'1', u'2', u'3', u'4', u'5', u'6', u'7', u'8', u'9',
    u'零', u'一', u'二', u'三', u'四', u'五', u'六', u'七', u'八', u'九', u'十', u'百', u'千'
]

sentiment.load('data/impurity_classifier')

def number_char_format(raw_chapter_title):
    """
    将章节标题中的连续数字用0代替,便于进行比较
    """
    fmt_chapter_title = u''
    flag = True
    for char in raw_chapter_title:
        if char not in number_char_list:
            fmt_chapter_title += char
            flag = True
        else:
            if flag:
                fmt_chapter_title += u'0'
            flag = False