def get_tf_idf_model(model_path=None, model_name=None): ''' get model extracting tf-idf feature :param model_path: str type, path of model :param model_name: str type, name of model ''' if model_path != None: model = pickle.load(open(model_path + model_name, 'rb')) else: model = pickle.dump(open(main_path() + '/tfidf_generator.pkl', 'wb')) return model
def get_vovab(question_id): """ :param question_id: :return: """ # with open(os.path.join("G:/工作任务2019/合作相关/合作成果/IntelligentJudgmentServer/src/py_server/feature_generater/vocab", str(question_id) + "/", 'vocab.data'), 'rb') as fin: # vocab = pickle.load(fin) with open(main_path() + "/vocab/" + str(question_id) + "/" + 'vocab.data', 'rb') as fin: vocab = pickle.load(fin) return vocab
def __init__(self): """ constructor """ self.model = MyDict() # key is question id, value is model #self.model_path = '/Volumes/E/workspace/IntelligentJudgmentModel/src/com/aic/pij/model/' self.model_path = main_path().rsplit('/', 1)[0] + '/model/' model_name = self.eachFile(self.model_path) print('Model loading ...') for ele in model_name: #ele looks like '2017_01_qm_11_1.svm.cf.m' print(' ', ele) ele_list = ele.split('.') question_model = self.load_model(self.model_path + ele, ele_list[-1]) self.model.add(key=ele_list[0], sub_key=ele_list[1], value=[question_model, ele_list[2], ele_list[-1]]) print('Model loading completed ...')
def train_tf_idf_model(train_corpus, save_path=None, save_name=None): ''' train a feature extractor of tf-idf :param train_corpus: list type, each item is a sentence with cut words :param save_path: str type, path of saving model :param save_name: str type, name of saving model ''' vectorizer = CountVectorizer(ngram_range=(1, 1), min_df=0.008) transformer = TfidfTransformer() tf = vectorizer.fit_transform(train_corpus) tfidf = transformer.fit_transform(tf) #a = tfidf.toarray() #print(a.shape) if save_path != None: pickle.dump((vectorizer, transformer), open(save_path + save_name + '.pkl', 'wb')) else: pickle.dump((vectorizer, transformer), open(main_path() + '/tfidf_generator.pkl', 'wb'))
def get_sess(question_id): """ 加载 对应question_id 的模型,返回sess :param question_id: :return: """ tf.reset_default_graph() sess = tf.Session() # 模型文件地址 # pb_file_path = "G:/工作任务2019/合作相关/合作成果/IntelligentJudgmentServer/src/py_server/feature_generater/models/" + question_id + "/model.pb" pb_file_path = main_path() + "/models/" + question_id + "/model.pb" print('我被调用了,我是获取session') with gfile.FastGFile(pb_file_path, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) sess.graph.as_default() tf.import_graph_def(graph_def, name='') # 导入计算图 # 需要有一个初始化的过程 sess.run(tf.global_variables_initializer()) return sess
def getWord_Tokens(sentence): """ :param sentence:需要进行分词的句子 :return: 返回以空格隔开的string类型 """ jieba.load_userdict(main_path() + '/data/jiebaDict.txt') print("我执行了这个加载") answer = sentence answer = answer.replace('cm', '') answer = answer.replace('米', '') answer = answer.replace('x', '') answer = answer.replace('≤', '-') answer = answer.replace('!', '-') answer = answer.replace('`', '-') answer = re.sub('[≤|<].*?[≤|<]', '-', answer) answer = answer.replace('155~160~165', '155-165') answer = answer.replace('150-155-160', '150-160') answer = answer.replace('~', '到') answer = answer.replace('-', '到') seg = jieba.cut(answer) seg = jieba.cut(sentence) return " ".join(seg).split(" ")
if IllegalTorF or RepeatTorF or SecquenceTorF: return 0 #预测分值 score = get_score(sess, vocab, question_id, str(question_content)) return score if __name__ == '__main__': print("--------Main----------") # print (get_score(get_model("math_002",sess),"math_002","选择155-165,因为身高集中。")) # print(get_model("math_002","选择155-165,因为身高集中。")) # print(get_model("math_002","选择155-165,因为身高集中。")) # print(get_model("math_002","选择155-165,因为身高集中。")) print(main_path()) question_id = "math_002" student_answer = "选择,因为身高集中。" #加载词 vocab = get_vovab(question_id) #加载模型 sess = get_sess(question_id) # #预测分值 # score = get_score(sess,vocab,question_id,student_answer) # print (score) # score = get_score(sess, vocab, question_id, student_answer) # print (score) # #结束 # sess.close() # # question_id = "math_004"