def predication(self): # 遍历每一个课程,选择相似度最高的分类(前三个) for course_name in self.course_name_list: if not self.sentence_words_dict.__contains__(course_name): continue course_name_word_list = self.sentence_words_dict[course_name] if len(course_name_word_list) == 0: self.course_catalog_unknow_list.append(course_name) continue # 遍历分类 index = 0 res_list = [] for catalog_name in self.catalog_code_dict.keys(): catalog_tuple = self.catalog_code_dict.get(catalog_name) catalog_code = catalog_tuple[0] catalog_name_word_list = catalog_tuple[2] score = self.pred_similarity(course_name_word_list, catalog_name_word_list) res = ResultInfo.ResultInfo(index, score, catalog_code, catalog_name) res_list.append(res) index += 1 # 对列表按score降序排列 res_list.sort(cmp=None, key=lambda x: x.score, reverse=True) # 选前3个最高的得分分类 best_candidate_list = res_list[:3] if best_candidate_list[0].score > 0.45: self.course_catalogs_good_dict[ course_name] = best_candidate_list else: self.course_catalogs_bad_dict[ course_name] = best_candidate_list
def getKnowledgeByName(self, name, k): """ 通过知识名称,查找知识对象 :param name: :return: """ flag = False if name is None: return flag for k_key in self.knowledgeByName.keys(): k_tup = self.knowledgeByName.get(k_key) k_words = k_tup[0] if len(k_words) == 0: continue if name == k_key: res = ResultInfo.ResultInfo(0, 1.0, k_tup[2], k_key) k.append(res) flag = True break return flag
def predication(self): self.bad_examquestion_list = [] self.course_score = CourseInfomation.CourseScore() self.course_score.initCourse(self.course_path_info_list[0].course) # match(n)-[:NEXT]-(m) where n.name in ['典型','金本位制','指','金币','本位'] return n,m if self.examquestion_info is None: return qindex = 0 question_knowledge_map = {} for course_path_info in self.course_path_info_list: if not self.examquestion_info.examquestion_dict.__contains__( course_path_info.course): return exam_question_list = self.examquestion_info.examquestion_dict.get( course_path_info.course) for exam_question in exam_question_list: #line = "物权的分类:从设立的角度对他物权再做分类,可把其分为()。,用益物权和担保物权" k = exam_question.knowledge_list q = exam_question.getContentAndAnswer() question_knowledge_map[q] = k qindex = qindex + 1 q_words = self.sentence.splitSentenceCanRepeat(q) # 从q中找重点词, 并放大重点词 q_words = self.preprocessor.enlargeVipWords(q_words, q) if len(q_words) == 0: continue # 然后再遍历知识点 index = 0 res_list = [] for k_key in self.knowledge.keys(): k_tup = self.knowledge.get(k_key) k_words = k_tup[0] if len(k_words) == 0: continue score = self.doc_vec.pred_similarity(q_words, k_words) res = ResultInfo.ResultInfo(index, score, k_tup[2], k_key) res_list.append(res) index += 1 # 对列表按score降序排列 res_list.sort(cmp=None, key=lambda x: x.score, reverse=True) # 取分值最高的几个,超过1%,的舍去,或者再限定具体数量,比如3个 # 统计得分的情况 self.computeScore(res_list) # 统计不超过50分的试题 if self.badExamquestionStatistics(res_list) == True: self.bad_examquestion_list.append( (exam_question, res_list[0:3])) # 获取上级 知识点 #reslist = self.getParentKnowledge(reslist) # 格式化输出 reslist, wordlist = self.formatOutput(res_list, k) # 统计正确率 if len(reslist) > 0: ns = '问题{0}:'.format(qindex) + q self.outputcontentlist.append(ns + '\n') ns = '电脑标识知识点:' + ';'.join(wordlist) self.outputcontentlist.append(ns + '\n') ns = '知识点评估指标:' + ';'.join(reslist) self.outputcontentlist.append(ns + '\n') #print '老师标识知识点:' + k ns = '老师标识知识点:' self.outputcontentlist.append(ns + '\n') self.outputcontentlist.append('\n') #ns = '电脑标识是否正确:' #self.outputcontentlist.append(ns) # 计算正确率 # 题目总数 self.course_score.compute() ns = '试题总数:{}'.format(self.course_score.score_scope_total) self.outputcontentlist.append(ns + '\n') print ns ns = '比较靠谱数(60分以上):{} ,比较靠谱占比:{}%'.format( self.course_score.score_scope_more60_count, round(self.course_score.score_scope_more60_rate * 100, 2)) self.outputcontentlist.append(ns + '\n') print ns ns = '基本靠谱数(50-60分):{} ,基本靠谱占比:{}%'.format( self.course_score.score_scope_between5060_count, round(self.course_score.score_scope_between5060_rate * 100, 2)) self.outputcontentlist.append(ns + '\n') print ns ns = '不太靠谱数(40-50分):{} ,不太靠谱占比:{}%'.format( self.course_score.score_scope_between4050_count, round(self.course_score.score_scope_between4050_rate * 100, 2)) self.outputcontentlist.append(ns + '\n') print ns ns = '不靠谱数(40分以下):{} ,不靠谱占比:{}%'.format( self.course_score.score_scope_less40_count, round(self.course_score.score_scope_less40_rate * 100, 2)) self.outputcontentlist.append(ns + '\n') print ns
def predication(self): # match(n)-[:NEXT]-(m) where n.name in ['典型','金本位制','指','金币','本位'] return n,m if self.input_subject_file is None: self.input_subject_file = self.doc_vec.train_input_subject_file question = open(self.input_subject_file, 'r') ids_lines = question.readlines() qindex = 0 question_knowledge_map = {} self.course_score = CourseInfomation.CourseScore() for line in ids_lines: #line = "物权的分类:从设立的角度对他物权再做分类,可把其分为()。,用益物权和担保物权" line = line.strip('\n') index = line.index(':') k = line[0:index] q = line[index + 1:] question_knowledge_map[q] = k qindex = qindex + 1 q_words = self.sentence.splitSentenceCanRepeat(q) # 从q中找重点词, 并放大重点词 q_words = self.preprocessor.enlargeVipWords(q_words, q) if len(q_words) == 0: continue # 然后再遍历知识点 index = 0 res_list = [] for k_key in self.knowledge.keys(): k_tup = self.knowledge.get(k_key) k_words = k_tup[0] if len(k_words) == 0: continue score = self.doc_vec.pred_similarity(q_words, k_words) res = ResultInfo.ResultInfo(index, score, k_tup[2] + ' ' + k_key) res_list.append(res) index += 1 # 对列表按score降序排列 res_list.sort(cmp=None, key=lambda x: x.score, reverse=True) # 取分值最高的几个,超过1%,的舍去,或者再限定具体数量,比如3个 # 统计得分的情况 self.computeScore(res_list) # 获取上级 知识点 #reslist = self.getParentKnowledge(reslist) # 格式化输出 reslist, wordlist = self.formatOutput(res_list) # 统计正确率 if len(reslist) > 0: ns = '问题{0}:'.format(qindex) + q self.outputcontentlist.append(ns + '\n') ns = '电脑标识知识点:' + ';'.join(wordlist) self.outputcontentlist.append(ns + '\n') ns = '知识点评估指标:' + ';'.join(reslist) self.outputcontentlist.append(ns + '\n') #print '老师标识知识点:' + k ns = '老师标识知识点:' + k self.outputcontentlist.append(ns + '\n') self.outputcontentlist.append('\n') #ns = '电脑标识是否正确:' #self.outputcontentlist.append(ns) # 计算正确率 # 题目总数 self.course_score.compute() ns = '试题总数:{}'.format(self.course_score.score_scope_total) self.outputcontentlist.append(ns + '\n') print ns ns = '比较靠谱数(60分以上):{} ,比较靠谱占比:{}%'.format( self.course_score.score_scope_more60_count, round(self.course_score.score_scope_more60_rate * 100, 2)) self.outputcontentlist.append(ns + '\n') print ns ns = '基本靠谱数(50-60分):{} ,基本靠谱占比:{}%'.format( self.course_score.score_scope_between5060_count, round(self.course_score.score_scope_between5060_rate * 100, 2)) self.outputcontentlist.append(ns + '\n') print ns ns = '不太靠谱数(40-50分):{} ,不太靠谱占比:{}%'.format( self.course_score.score_scope_between4050_count, round(self.course_score.score_scope_between4050_rate * 100, 2)) self.outputcontentlist.append(ns + '\n') print ns ns = '不靠谱数(40分以下):{} ,不靠谱占比:{}%'.format( self.course_score.score_scope_less40_count, round(self.course_score.score_scope_less40_rate * 100, 2)) self.outputcontentlist.append(ns + '\n') print ns