Esempio n. 1
0
def get_tf_idf_model(model_path=None, model_name=None):
    '''
    get model extracting tf-idf feature
    :param model_path: str type, path of model
    :param model_name: str type, name of model
    '''
    if model_path != None:
        model = pickle.load(open(model_path + model_name, 'rb'))
    else:
        model = pickle.dump(open(main_path() + '/tfidf_generator.pkl', 'wb'))
    return model
def get_vovab(question_id):
    """

    :param question_id:
    :return:
    """
    #     with open(os.path.join("G:/工作任务2019/合作相关/合作成果/IntelligentJudgmentServer/src/py_server/feature_generater/vocab", str(question_id) + "/", 'vocab.data'), 'rb') as fin:
    #         vocab = pickle.load(fin)

    with open(main_path() + "/vocab/" + str(question_id) + "/" + 'vocab.data',
              'rb') as fin:
        vocab = pickle.load(fin)

    return vocab
Esempio n. 3
0
 def __init__(self):
     """
     constructor
     """
     self.model = MyDict()  # key is question id, value is model
     #self.model_path = '/Volumes/E/workspace/IntelligentJudgmentModel/src/com/aic/pij/model/'
     self.model_path = main_path().rsplit('/', 1)[0] + '/model/'
     model_name = self.eachFile(self.model_path)
     print('Model loading ...')
     for ele in model_name:
         #ele looks like '2017_01_qm_11_1.svm.cf.m'
         print('  ', ele)
         ele_list = ele.split('.')
         question_model = self.load_model(self.model_path + ele,
                                          ele_list[-1])
         self.model.add(key=ele_list[0],
                        sub_key=ele_list[1],
                        value=[question_model, ele_list[2], ele_list[-1]])
     print('Model loading completed ...')
Esempio n. 4
0
def train_tf_idf_model(train_corpus, save_path=None, save_name=None):
    '''
    train a feature extractor of tf-idf
    :param train_corpus: list type, each item is a sentence with cut words
    :param save_path: str type, path of saving model
    :param save_name: str type, name of saving model
    '''
    vectorizer = CountVectorizer(ngram_range=(1, 1), min_df=0.008)
    transformer = TfidfTransformer()
    tf = vectorizer.fit_transform(train_corpus)
    tfidf = transformer.fit_transform(tf)
    #a = tfidf.toarray()
    #print(a.shape)
    if save_path != None:
        pickle.dump((vectorizer, transformer),
                    open(save_path + save_name + '.pkl', 'wb'))
    else:
        pickle.dump((vectorizer, transformer),
                    open(main_path() + '/tfidf_generator.pkl', 'wb'))
def get_sess(question_id):
    """
    加载 对应question_id 的模型,返回sess
    :param question_id:
    :return:
    """
    tf.reset_default_graph()
    sess = tf.Session()
    # 模型文件地址
    #     pb_file_path = "G:/工作任务2019/合作相关/合作成果/IntelligentJudgmentServer/src/py_server/feature_generater/models/" + question_id + "/model.pb"
    pb_file_path = main_path() + "/models/" + question_id + "/model.pb"
    print('我被调用了,我是获取session')
    with gfile.FastGFile(pb_file_path, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        sess.graph.as_default()
        tf.import_graph_def(graph_def, name='')  # 导入计算图

    # 需要有一个初始化的过程
    sess.run(tf.global_variables_initializer())

    return sess
def getWord_Tokens(sentence):
    """
    :param sentence:需要进行分词的句子
    :return: 返回以空格隔开的string类型
    """
    jieba.load_userdict(main_path() + '/data/jiebaDict.txt')
    print("我执行了这个加载")
    answer = sentence
    answer = answer.replace('cm', '')
    answer = answer.replace('米', '')
    answer = answer.replace('x', '')
    answer = answer.replace('≤', '-')
    answer = answer.replace('!', '-')
    answer = answer.replace('`', '-')
    answer = re.sub('[≤|<].*?[≤|<]', '-', answer)
    answer = answer.replace('155~160~165', '155-165')
    answer = answer.replace('150-155-160', '150-160')
    answer = answer.replace('~', '到')
    answer = answer.replace('-', '到')
    seg = jieba.cut(answer)

    seg = jieba.cut(sentence)

    return " ".join(seg).split(" ")
    if IllegalTorF or RepeatTorF or SecquenceTorF:
        return 0
    #预测分值

    score = get_score(sess, vocab, question_id, str(question_content))

    return score


if __name__ == '__main__':
    print("--------Main----------")
    # print (get_score(get_model("math_002",sess),"math_002","选择155-165,因为身高集中。"))
    # print(get_model("math_002","选择155-165,因为身高集中。"))
    # print(get_model("math_002","选择155-165,因为身高集中。"))
    # print(get_model("math_002","选择155-165,因为身高集中。"))
    print(main_path())
    question_id = "math_002"
    student_answer = "选择,因为身高集中。"
    #加载词
    vocab = get_vovab(question_id)
    #加载模型
    sess = get_sess(question_id)
    #     #预测分值
    #     score = get_score(sess,vocab,question_id,student_answer)
    #     print (score)
    #     score = get_score(sess, vocab, question_id, student_answer)
    #     print (score)
    #     #结束
    #     sess.close()
    #
    #     question_id = "math_004"