def predict(essay): with open('gradually/learn/rbfmodel.pkl', 'rb') as f: model = jb.load(f) # preproc returns {'essay_list': essay_list, 'essay_set': essay_set, 'no_punc_list': no_punc_list, 'low_list': low_ # list, 'no_stop_list': no_stop_list, 'gradr_no_stop_list': gradr_no_stop_list} prep_res = pr.preproc(essay) # featext returns {'lexical_div': lex_div, 'word_cnt': w_cnt, 'long_word_cnt': lng_w_cnt, 'spell_err_cnt': spl, # 'distinct_word_cnt': dst_wrd_cnt, 'stem_cnt': stm_cnt} feat = fe.featext(prep_res) feat_mtx = np.zeros((1, 6)) feat_mtx[0, 0] = feat['word_cnt'] feat_mtx[0, 1] = feat['long_word_cnt'] feat_mtx[0, 2] = feat['spell_err_cnt'] feat_mtx[0, 3] = feat['lexical_div'] feat_mtx[0, 4] = feat['distinct_word_cnt'] feat_mtx[0, 5] = feat['stem_cnt'] feat_mtx.reshape((1, 6)) # print('feature matrix: ') # print(feat_mtx) grade = model.predict(feat_mtx) if feat['spell_err_cnt'] / feat['word_cnt'] > 0.3: grade = 0 return grade * 100
def check(essay): prep = pr.preproc(essay) essay_list = prep['essay_list'] err = [word for word in essay_list if d.check(word) == False] sug = [d.suggest(word) for word in err] return [err, sug]
def check(essay): prep = pr.preproc(essay) essay_list = prep['essay_list'] err = [word for word in essay_list if d.check(word)==False] sug = [d.suggest(word) for word in err] return [err, sug]
# load excel sheet workbook = xlrd.open_workbook('../data/training_set_rel3.xlsx', on_demand=True) sheet = workbook.sheet_by_index(0) # create training matrix data_num = 12978 feat_num = 6 + 1 # 6 features one label feat_mtx = np.zeros((data_num, feat_num), dtype=np.double) for i in range(data_num): # get essay from dataset essay = sheet.cell(i + 1, 2).value label = sheet.cell(i + 1, 6).value # pre process and extract features essay = pr.preproc(essay) feat = fe.featext(essay) # {'lexical_div': lex_div, 'word_cnt': w_cnt, 'long_word_cnt': lng_w_cnt, 'spell_err_cnt': spl, # 'distinct_word_cnt': dst_wrd_cnt, 'stem_cnt': stm_cnt} feat_mtx[i, 0] = feat['word_cnt'] feat_mtx[i, 1] = feat['long_word_cnt'] feat_mtx[i, 2] = feat['spell_err_cnt'] feat_mtx[i, 3] = feat['lexical_div'] feat_mtx[i, 4] = feat['distinct_word_cnt'] feat_mtx[i, 5] = feat['stem_cnt'] feat_mtx[i, 6] = label print(i)