Example #1
0
    def generateCourseBaseCode(self, course):
        index = 0
        if self.course_base_dict.__contains__(course.coursebase_name):
            self.current_base_course = self.course_base_dict.get(
                course.coursebase_name)
            course.coursebase_code = self.current_base_course.coursebase_code
            course.coursebase_index = self.current_base_course.coursebase_index
        else:
            if self.current_base_course is None:
                index = 1

            else:
                index = self.current_base_course.coursebase_index

            if self.max_index >= index:
                self.max_index += 1
            else:
                self.max_index = index

            course.coursebase_code = 'open.bc.' + str(self.max_index)
            course.coursebase_index = self.max_index

            # 课程基础
            cb = CourseInfomation.CourseBase()
            cb.coursebase_code = course.coursebase_code
            cb.coursebase_name = course.coursebase_name
            cb.coursebase_index = course.coursebase_index
            self.course_base_dict[cb.coursebase_name] = cb

            self.current_base_course = cb
    def createCypherFile(self, course):
        """
        数据生成cypher语句文件
        :param course: 
        :return: 
        """
        if not self.exam_info.examquestion_dict.__contains__(course):
            return

        exam_question_list = self.exam_info.examquestion_dict.get(course)
        # 将知识点和试题关联起来
        cypherlist = self.generateCypher(course, exam_question_list)

        # 保存数据
        course_path_info = CourseInfomation.CourseFilepath()
        course_path_info.courseware_source_directory = self.rootpath
        course_path_info.initByCourse(course)
        file_name = course_path_info.cypher_txt_filepath
        fout = open(file_name, 'w')
        for item in cypherlist:
            fout.write(item)
            fout.write('\n')
        fout.close()
        print 'cypher文件:{}已生成'.format(course.NewCourseName)
        return cypherlist
Example #3
0
    def associateFlow(self, course_list):
        """
        关联流程,按步骤执行即可
        :return: 
        """
        self.treefactory.course_filepath_list = []
        for course_tuple in course_list:
            course_path_info = CourseInfomation.CourseFilepath()
            # 指定课件的源文件路径
            course_path_info.courseware_source_directory = self.srcrootpath
            course_path_info.courseware_source_docx_filepath = self.c_rootpath + '/' + course_tuple[
                1]
            course_path_info.initByCourse(course_tuple[0])

            self.treefactory.course_filepath_list.append(course_path_info)

        print '正在转换docx到txt,并抽取知识树...'
        self.treefactory.extractKnowledge()
        #self.knowledgemidfilepath = self.treefactory.wordreader.outputmidfilepath
        print '知识树抽取完成。'

        # 第三步:处理试题源文件,生成训练和测试样本
        print '开始获取试题数据...'
        self.questionSourceFileProcess(self.treefactory.course_filepath_list)
        #print '开始生成试题词网...'
        #self.questionGenerateWordnet()
        #print '试题数据获取、试题词网生成完成。'
        print '试题数据获取完成。'

        # 第四步:将知识点和试题进行关联
        print '开始关联试题与知识点...'
        self.questionAndKnowledge(self.treefactory.course_filepath_list)
        print '完成试题与知识点的关联。'
    def createCorpusFile(self, course_path_info):
        # 将结果按文件id输出
        course = course_path_info.course
        if self.exam_info.examquestion_dict.__contains__(course):
            file_name = course_path_info.examquestion_source_txt_filepath
            row_list = self.exam_info.examquestion_dict.get(course)
            fout = open(file_name, 'w')
            for item in row_list:
                fout.write(item.getContentAndAnswer())
                fout.write('\n')
            fout.close()
            print '语料文件:{}已生成'.format(course.NewCourseName)

        return
        # 下面的是生成全集的
        for course in self.exam_info.examquestion_dict.keys():
            course_path_info = CourseInfomation.CourseFilepath()
            course_path_info.courseware_source_directory = self.rootpath
            course_path_info.initByCourse(course)
            file_name = course_path_info.examquestion_source_txt_filepath
            row_list = self.exam_info.examquestion_dict.get(course)
            fout = open(file_name, 'w')
            for item in row_list:
                fout.write(item.getContentAndAnswer())
                fout.write('\n')
            fout.close()
            print '语料文件:{}已生成'.format(course.NewCourseName)
Example #5
0
 def __init__(self, course_source_filename):
     """
     initialize data
     """
     # course对象
     self.course_info = CourseInfomation.CourseDictionary(
         course_source_filename)
     self.excel_reader = ExcelReader.ExcelReader()
Example #6
0
    def statistics(self):
        """
        对course_score_list中的结果,进行统计
        :return: 
        """
        # 课程的数量分布统计变量
        n_coure_count_more50 = 0
        n_coure_score_less50 = 0

        # 统计所有课程,百分比的分布情况
        n_coure_score = CourseInfomation.CourseScore()
        for course_score in self.course_score_list:
            n_coure_score.score_scope_more60_count += course_score.score_scope_more60_count
            n_coure_score.score_scope_between5060_count += course_score.score_scope_between5060_count
            n_coure_score.score_scope_between4050_count += course_score.score_scope_between4050_count
            n_coure_score.score_scope_less40_count += course_score.score_scope_less40_count

            # 如果50分以上的超过50%,则more50+1
            if (n_coure_score.score_scope_between5060_count +
                    n_coure_score.score_scope_more60_count):
                n_coure_count_more50 += 1
            else:
                n_coure_score_less50 += 1

        n_coure_score.compute()
        ns = '试题总数:{}'.format(n_coure_score.score_scope_total)
        print ns

        ns = '比较靠谱数(60分以上):{}  ,比较靠谱占比:{}%'.format(
            n_coure_score.score_scope_more60_count,
            round(n_coure_score.score_scope_more60_rate * 100, 2))
        print ns
        ns = '基本靠谱数(50-60分):{}  ,基本靠谱占比:{}%'.format(
            n_coure_score.score_scope_between5060_count,
            round(n_coure_score.score_scope_between5060_rate * 100, 2))
        print ns
        ns = '不太靠谱数(40-50分):{}  ,不太靠谱占比:{}%'.format(
            n_coure_score.score_scope_between4050_count,
            round(n_coure_score.score_scope_between4050_rate * 100, 2))
        print ns
        ns = '不靠谱数(40分以下):{}  ,不靠谱占比:{}%'.format(
            n_coure_score.score_scope_less40_count,
            round(n_coure_score.score_scope_less40_rate * 100, 2))
        print ns

        # 统计课程所在区域的分布情况
        n_coure_count_total = n_coure_score_less50 + n_coure_count_more50
        n_coure_score_less_rate = float(
            n_coure_score_less50) / n_coure_count_total
        n_coure_count_more_rate = float(
            n_coure_count_more50) / n_coure_count_total
        ns = '50%以上的试题得分大于50分的课程数量:{}  占比:{}'.format(n_coure_count_more50,
                                                     n_coure_count_more_rate)
        print ns
        ns = '50%以上的试题得分小于50分的课程数量:{}  占比:{}'.format(n_coure_score_less50,
                                                     n_coure_score_less_rate)
        print ns
        pass
Example #7
0
 def loadBaseCourse(self, base_course_file):
     if not FilePath.fileExist(base_course_file):
         return
     f_input = open(base_course_file, 'r')
     for line in f_input:
         line = line.strip('\n')
         cb = CourseInfomation.CourseBase()
         cb.initByString(line)
         self.course_base_list.append(cb)
         self.current_base_course = cb
Example #8
0
    def __init__(self):
        """
        initialize local variables.
        """
        self.courseinfo = CourseInfomation.CourseDictionary()
        #self.courseinfo.initDictionary(u'./../data/dictionary/course.txt')

        self.excelreader = ExcelReader.ExcelReader()

        self.re_num_0 = ur'(第[一二三四五六七八九零十百千万亿0123456789]+[章节讲])'
        self.pattern = re.compile(self.re_num_0)

        self.outputfile = u'./../data/course-knowledge-tgt/'
Example #9
0
    def __init__(self, course_source_filename):
        """
        initialize data
        """

        # course对象
        self.course_info = CourseInfomation.CourseDictionary(
            course_source_filename)
        self.excel_reader = ExcelReader.ExcelReader()

        self.course_base_dict = {}
        self.course_base_list = []
        self.current_base_course = None
        self.max_index = 0
 def loadProcessedCourse(self, rootpath):
     output_mid_filepath = '{}/statistics-mid.txt'.format(rootpath)
     if not FilePath.fileExist(output_mid_filepath):
         return
     fout = open(output_mid_filepath, 'r')
     lines = fout.readlines()
     for one_course_str in lines:
         course_score = CourseInfomation.CourseScore()
         course_score.initByString(one_course_str)
         key = '{}-{}'.format(course_score.school_code,
                              course_score.course_code)
         self.course_processed_dict[key] = course_score
         self.course_score_list.append(course_score)
     fout.close()
Example #11
0
    def __init__(self):
        """
        initialize data
        """
        self.result = []
        self.cypher = None
        self.cypherlist = []

        self.knowledge = {}
        self.question = {}

        self.course = CourseInfomation.CourseDictionary()

        #self.course.initDictionary(u'./../data/dictionary/course.txt')
        # 记录没有知识点的行,打印用
        self.no_kwg_row_count = 0
        # 初始化列
        self.__initColumn()
Example #12
0
 def createExcelFile(self):
     """
     按照课程,分别存储每个文件
     :return: 
     """
     for course in self.exam_info.examquestion_dict.keys():
         course_path_info = CourseInfomation.CourseFilepath()
         course_path_info.courseware_source_directory = self.rootpath
         course_path_info.initByCourse(course)
         file_name = course_path_info.examquestion_source_xlsx_filepath
         row_list = self.exam_info.examquestion_dict.get(course)
         column_data_list = []
         column_data_list.append(QuestionInformation.column_head_list)
         for exam_question in row_list:
             if exam_question.content.startswith(u'<img'):
                 continue
             column_data_list.append(exam_question.toList())
         sheet_datas = {}
         sheet_datas['sheet1'] = column_data_list
         ExcelWriter.writeExcelFile(
             course_path_info.examquestion_source_xlsx_filepath,
             sheet_datas)
         print 'Excel文件:{}已生成'.format(course.NewCourseName)
Example #13
0
    def statistics(self, statistics_filepath):
        """
        对course_score_list中的结果,进行统计
        :return: 
        """
        # bad course list
        bad_course_list = []
        # 课程的数量分布统计变量
        n_coure_count_more50 = 0
        n_coure_score_less50 = 0

        f_stat = open(statistics_filepath, 'w')
        # 统计所有课程,百分比的分布情况
        n_coure_score = CourseInfomation.CourseScore()
        for course_score in self.course_score_list:
            n_coure_score.score_scope_more60_count += course_score.score_scope_more60_count
            n_coure_score.score_scope_between5060_count += course_score.score_scope_between5060_count
            n_coure_score.score_scope_between4050_count += course_score.score_scope_between4050_count
            n_coure_score.score_scope_less40_count += course_score.score_scope_less40_count

            course_descrip = course_score.getDescription()
            f_stat.write('\n'.join(course_descrip))
            f_stat.write('\n\n')

            # 如果50分以上的超过50%,则more50+1
            if (course_score.score_scope_between5060_rate +
                    course_score.score_scope_more60_rate) > 0.5:
                n_coure_count_more50 += 1
            else:
                n_coure_score_less50 += 1
                bad_course_list.append(course_score)

        f_stat.write('所有课程的汇总统计:')
        course_descrip = n_coure_score.getDescription()
        f_stat.write('\n'.join(course_descrip))
        f_stat.write('\n\n')

        # 统计课程所在区域的分布情况
        n_coure_count_total = n_coure_score_less50 + n_coure_count_more50
        if n_coure_count_total > 0:
            n_coure_score_less_rate = float(
                n_coure_score_less50) / n_coure_count_total
            n_coure_count_more_rate = float(
                n_coure_count_more50) / n_coure_count_total
            ns = '50%以上的试题得分大于50分的课程数量:{}  占比:{}'.format(
                n_coure_count_more50, n_coure_count_more_rate)
            print ns
            ns = '50%以上的试题得分小于50分的课程数量:{}  占比:{}'.format(
                n_coure_score_less50, n_coure_score_less_rate)
            print ns

        # 保存bad course信息
        print 'bad course information.'
        for bad_course in bad_course_list:

            course_descrip = bad_course.getDescription()
            f_stat.write('\n'.join(course_descrip))
            f_stat.write('\n')

        # 保存未识别的课程
        f_stat.write('\n\n')
        f_stat.write('未识别的课程:')
        f_stat.write('\n'.join(self.course_unrecongnized))
        f_stat.write('\n\n')
        f_stat.write('超出范围的课程:')
        f_stat.write('\n'.join(self.course_over_scope))
        f_stat.close()
Example #14
0
            answer_content = answer_content + ', ' + str(question_row[4])
        if str(question_answer).__contains__('D'):
            answer_content = answer_content + ', ' + str(question_row[5])
        if str(question_answer).__contains__('E'):
            answer_content = answer_content + ', ' + str(question_row[6])

        answer_content = str(answer_content)
        if len(answer_content):
            answer_content = answer_content[1:]
        knowledge = ''
        if len(question_row) > 12:
            knowledge = str(question_row[12])
        content = '{}:: {} 答案:{}'.format(knowledge, question_content,
                                         answer_content)

        return content


if __name__ == '__main__':
    #read_excel()
    er = ExamQuestionProcessor()
    er.isTest = True
    course_path_info = CourseInfomation.CourseFilepath()
    course_path_info.courseware_source_directory = er.rootpath
    course_path_info.examquestion_source_xlsx_filepath = u'{}/q-xlsx/20181122-200plus.xlsx'.format(
        er.rootpath)
    course_info = CourseInfomation.CourseDictionary()
    er.setCourseInfo(course_info)
    er.courseExamQuestionGenerator(course_path_info)

    print ''
    def batchProcessAssociate1(self, dirname):
        """
        批量处理多个课程的自动关联工作
        :return: 
        """

        # 指定一个文件夹,该文件夹用来存放多个课程的pdf课件
        # 逐个课件处理
        # 首先指定根目录位置,从该目录读取pdf课件
        srcrootpath = './../../data/course-knowledge-machine/' + dirname
        c_rootpath = srcrootpath + '/c-docx'
        #q_rootpath = srcrootpath + '/q-xlsx'
        self.loadProcessedCourse(srcrootpath)
        # os.walk(path)这个函数得到的结果是一个或多个tuple,
        # 或者使用os.listdir(path)函数能得到文件夹下所有文件(包括文件夹)的名称,但是无法获取子文件夹的状态
        count = 0
        filelist = self.get_filename_from_dir(c_rootpath)
        # 然后是对每一个文件进行处理
        for f in filelist:
            count = count + 1
            self.coursename = os.path.splitext(f)[0]
            # 判断该课程是否在需要处理的范围之内,如果不是,则跳过该课程
            current_coursename = self.getCourseNameFromFileName(
                self.coursename)
            current_schoolname = self.getSchoolNameFromFileName(
                self.coursename)
            if not self.school_course_scope_dict.__contains__(
                    current_schoolname + current_coursename):
                self.course_over_scope.append(f)
                continue

            # 如果课程名称中包含英语,不处理
            if current_coursename.__contains__(u'英语'):
                continue

            # 如果该课程在需要处理的范围内,则开始处理
            print '开始处理文件:{}'.format(f)
            course = self.school_course_scope_dict.get(current_schoolname +
                                                       current_coursename)
            if len(course.SchoolName) == 0:
                self.course_unrecongnized.append(f)
                continue
            # 如果课程已经被处理了,跳过
            course_key = '{}-{}'.format(course.SchoolCode, course.CourseCode)
            if self.course_processed_dict.__contains__(course_key):
                print '第{0}篇 课程:{1} 已处理过;'.format(count, f)
                continue
            course_path_info = CourseInfomation.CourseFilepath()
            # 指定课件的源文件路径
            course_path_info.courseware_source_directory = srcrootpath
            course_path_info.courseware_source_docx_filepath = c_rootpath + '/' + f
            #course_path_info.examquestion_source_xlsx_filepath = q_rootpath + '/' + current_coursename + '.xlsx'
            course_path_info.initByCourse(course)

            # 如果已经关联过了,不用再次关联

            self.associateFlow(course_path_info)

            print '第{0}篇 课程:{1} 处理完成;'.format(count, f)

            # 测试时,先跑4个文件即可
            if self.isTest:
                if count > 4:
                    break

        print '所有课程处理完毕,共处理:{0}篇'.format(count)

        # 开始统计结果
        print '开始统计结果。'
        statistics_filepath = '{}/statistics.txt'.format(srcrootpath)
        self.statistics(statistics_filepath)
        print '统计结果结束。'

        # 关联差的数据保存
        combine_bad_filepath = '{}/combine_bad.xls'.format(srcrootpath)
        self.combineBadExamquestion(combine_bad_filepath)
Example #16
0
                continue
            if word.startswith(u'概'):
                continue

            result_list.append(word)

        return result_list

    def outfile(self, filepath):
        fout = open(filepath, 'w')
        fout.writelines(self.snd_level_catalog)
        fout.close()


if __name__ == "__main__":

    course_path_info = CourseInfomation.CourseFilepath()
    course_path_info.courseware_source_txt_filepath = u'./../../data/course-base/本科专业目录-catalog.xlsx.txt'
    course_path_info.vector_corpus_txt_filepath = u'./../../data/course-base/本科专业目录-catalog.corpus.txt'
    course_path_info.vector_model_bin_filepath = u'./../../data/course-base/本科专业目录-catalog.model.bin'
    course_path_info.correlation_txt_filepath = u'./../../data/course-base/本科专业目录-course-catalog.txt'
    sr = TextVector(course_path_info)
    #sr.course_path_info = course_path_info

    sr.readCourseNameList()
    sr.train()
    sr.predication()
    sr.output_dict()
    filepath = u'./../../data/course-base/本科专业目录-course-catalog-tag.txt'
    sr.outfile(filepath)
    def predication(self):
        self.bad_examquestion_list = []
        self.course_score = CourseInfomation.CourseScore()
        self.course_score.initCourse(self.course_path_info_list[0].course)
        # match(n)-[:NEXT]-(m) where n.name in ['典型','金本位制','指','金币','本位'] return n,m
        if self.examquestion_info is None:
            return

        qindex = 0
        question_knowledge_map = {}
        for course_path_info in self.course_path_info_list:

            if not self.examquestion_info.examquestion_dict.__contains__(
                    course_path_info.course):
                return
            exam_question_list = self.examquestion_info.examquestion_dict.get(
                course_path_info.course)

            for exam_question in exam_question_list:
                #line = "物权的分类:从设立的角度对他物权再做分类,可把其分为()。,用益物权和担保物权"
                k = exam_question.knowledge_list
                q = exam_question.getContentAndAnswer()
                question_knowledge_map[q] = k
                qindex = qindex + 1

                q_words = self.sentence.splitSentenceCanRepeat(q)
                # 从q中找重点词, 并放大重点词
                q_words = self.preprocessor.enlargeVipWords(q_words, q)
                if len(q_words) == 0:
                    continue
                # 然后再遍历知识点
                index = 0
                res_list = []
                for k_key in self.knowledge.keys():
                    k_tup = self.knowledge.get(k_key)
                    k_words = k_tup[0]
                    if len(k_words) == 0:
                        continue
                    score = self.doc_vec.pred_similarity(q_words, k_words)
                    res = ResultInfo.ResultInfo(index, score, k_tup[2], k_key)
                    res_list.append(res)
                    index += 1
                # 对列表按score降序排列
                res_list.sort(cmp=None, key=lambda x: x.score, reverse=True)
                # 取分值最高的几个,超过1%,的舍去,或者再限定具体数量,比如3个

                # 统计得分的情况
                self.computeScore(res_list)

                # 统计不超过50分的试题
                if self.badExamquestionStatistics(res_list) == True:
                    self.bad_examquestion_list.append(
                        (exam_question, res_list[0:3]))

                # 获取上级 知识点
                #reslist = self.getParentKnowledge(reslist)
                # 格式化输出
                reslist, wordlist = self.formatOutput(res_list, k)
                # 统计正确率
                if len(reslist) > 0:
                    ns = '问题{0}:'.format(qindex) + q
                    self.outputcontentlist.append(ns + '\n')
                    ns = '电脑标识知识点:' + ';'.join(wordlist)
                    self.outputcontentlist.append(ns + '\n')
                    ns = '知识点评估指标:' + ';'.join(reslist)
                    self.outputcontentlist.append(ns + '\n')
                    #print '老师标识知识点:' + k
                    ns = '老师标识知识点:'
                    self.outputcontentlist.append(ns + '\n')
                    self.outputcontentlist.append('\n')
                    #ns = '电脑标识是否正确:'
                    #self.outputcontentlist.append(ns)

        # 计算正确率
        # 题目总数
        self.course_score.compute()

        ns = '试题总数:{}'.format(self.course_score.score_scope_total)
        self.outputcontentlist.append(ns + '\n')
        print ns

        ns = '比较靠谱数(60分以上):{}  ,比较靠谱占比:{}%'.format(
            self.course_score.score_scope_more60_count,
            round(self.course_score.score_scope_more60_rate * 100, 2))
        self.outputcontentlist.append(ns + '\n')
        print ns
        ns = '基本靠谱数(50-60分):{}  ,基本靠谱占比:{}%'.format(
            self.course_score.score_scope_between5060_count,
            round(self.course_score.score_scope_between5060_rate * 100, 2))
        self.outputcontentlist.append(ns + '\n')
        print ns
        ns = '不太靠谱数(40-50分):{}  ,不太靠谱占比:{}%'.format(
            self.course_score.score_scope_between4050_count,
            round(self.course_score.score_scope_between4050_rate * 100, 2))
        self.outputcontentlist.append(ns + '\n')
        print ns
        ns = '不靠谱数(40分以下):{}  ,不靠谱占比:{}%'.format(
            self.course_score.score_scope_less40_count,
            round(self.course_score.score_scope_less40_rate * 100, 2))
        self.outputcontentlist.append(ns + '\n')
        print ns
    def predication(self):
        # match(n)-[:NEXT]-(m) where n.name in ['典型','金本位制','指','金币','本位'] return n,m
        if self.input_subject_file is None:
            self.input_subject_file = self.doc_vec.train_input_subject_file

        question = open(self.input_subject_file, 'r')
        ids_lines = question.readlines()
        qindex = 0
        question_knowledge_map = {}
        self.course_score = CourseInfomation.CourseScore()

        for line in ids_lines:
            #line = "物权的分类:从设立的角度对他物权再做分类,可把其分为()。,用益物权和担保物权"
            line = line.strip('\n')
            index = line.index(':')
            k = line[0:index]
            q = line[index + 1:]
            question_knowledge_map[q] = k
            qindex = qindex + 1
            q_words = self.sentence.splitSentenceCanRepeat(q)
            # 从q中找重点词, 并放大重点词
            q_words = self.preprocessor.enlargeVipWords(q_words, q)
            if len(q_words) == 0:
                continue
            # 然后再遍历知识点
            index = 0
            res_list = []
            for k_key in self.knowledge.keys():
                k_tup = self.knowledge.get(k_key)
                k_words = k_tup[0]
                if len(k_words) == 0:
                    continue
                score = self.doc_vec.pred_similarity(q_words, k_words)
                res = ResultInfo.ResultInfo(index, score,
                                            k_tup[2] + ' ' + k_key)
                res_list.append(res)
                index += 1
            # 对列表按score降序排列
            res_list.sort(cmp=None, key=lambda x: x.score, reverse=True)
            # 取分值最高的几个,超过1%,的舍去,或者再限定具体数量,比如3个

            # 统计得分的情况
            self.computeScore(res_list)

            # 获取上级 知识点
            #reslist = self.getParentKnowledge(reslist)
            # 格式化输出
            reslist, wordlist = self.formatOutput(res_list)
            # 统计正确率
            if len(reslist) > 0:
                ns = '问题{0}:'.format(qindex) + q
                self.outputcontentlist.append(ns + '\n')
                ns = '电脑标识知识点:' + ';'.join(wordlist)
                self.outputcontentlist.append(ns + '\n')
                ns = '知识点评估指标:' + ';'.join(reslist)
                self.outputcontentlist.append(ns + '\n')
                #print '老师标识知识点:' + k
                ns = '老师标识知识点:' + k
                self.outputcontentlist.append(ns + '\n')
                self.outputcontentlist.append('\n')
                #ns = '电脑标识是否正确:'
                #self.outputcontentlist.append(ns)

        # 计算正确率
        # 题目总数
        self.course_score.compute()

        ns = '试题总数:{}'.format(self.course_score.score_scope_total)
        self.outputcontentlist.append(ns + '\n')
        print ns

        ns = '比较靠谱数(60分以上):{}  ,比较靠谱占比:{}%'.format(
            self.course_score.score_scope_more60_count,
            round(self.course_score.score_scope_more60_rate * 100, 2))
        self.outputcontentlist.append(ns + '\n')
        print ns
        ns = '基本靠谱数(50-60分):{}  ,基本靠谱占比:{}%'.format(
            self.course_score.score_scope_between5060_count,
            round(self.course_score.score_scope_between5060_rate * 100, 2))
        self.outputcontentlist.append(ns + '\n')
        print ns
        ns = '不太靠谱数(40-50分):{}  ,不太靠谱占比:{}%'.format(
            self.course_score.score_scope_between4050_count,
            round(self.course_score.score_scope_between4050_rate * 100, 2))
        self.outputcontentlist.append(ns + '\n')
        print ns
        ns = '不靠谱数(40分以下):{}  ,不靠谱占比:{}%'.format(
            self.course_score.score_scope_less40_count,
            round(self.course_score.score_scope_less40_rate * 100, 2))
        self.outputcontentlist.append(ns + '\n')
        print ns
Example #19
0
        k_list = []
        f = open(self.ngram.outputfile)
        for k in f:
            k_list.append(k)

        return k_list


if __name__ == "__main__":

    pusher = TreeFactory()
    sen = u'“诗史”'
    sen = pusher.postProcessSentence(sen)
    sen = u'协议出让33.4%表 2.招标出让22%图3.折扣(discount为'
    pattern = re.compile(pusher.re_nouse_tag_percent)
    res = pattern.findall(sen)

    pusher.outputfile = u'./../data/course-knowledge-tgt/抽取模板.txt'
    pusher.inputfile = u'D:/奥鹏/学生服务中心标注/文科课程电子辅导资料-docx/抽取模板.docx'
    course_filepath = CourseInfomation.CourseFilepath()
    course_filepath.sourse_filetype = course_filepath.type_text
    course_filepath.courseware_source_txt_filepath = u'./../../data/other/中级财务会计.txt'
    course_filepath.courseware_knowledge_txt_filepath = u'./../../data/other/中级财务会计-kwg.txt'
    course = CourseInfomation.Course()
    course.CourseCode = '1000'
    course_filepath.course = course
    pusher.course_filepath = course_filepath

    pusher.extractKnowledge()
    print 'over.'