Ejemplo n.º 1
0
def predict(essay):
    with open('gradually/learn/rbfmodel.pkl', 'rb') as f:
        model = jb.load(f)

    # preproc returns {'essay_list': essay_list, 'essay_set': essay_set, 'no_punc_list': no_punc_list, 'low_list': low_
    # list, 'no_stop_list': no_stop_list, 'gradr_no_stop_list': gradr_no_stop_list}
    prep_res = pr.preproc(essay)

    # featext returns {'lexical_div': lex_div, 'word_cnt': w_cnt, 'long_word_cnt': lng_w_cnt, 'spell_err_cnt': spl,
    # 'distinct_word_cnt': dst_wrd_cnt, 'stem_cnt': stm_cnt}
    feat = fe.featext(prep_res)

    feat_mtx = np.zeros((1, 6))
    feat_mtx[0, 0] = feat['word_cnt']
    feat_mtx[0, 1] = feat['long_word_cnt']
    feat_mtx[0, 2] = feat['spell_err_cnt']
    feat_mtx[0, 3] = feat['lexical_div']
    feat_mtx[0, 4] = feat['distinct_word_cnt']
    feat_mtx[0, 5] = feat['stem_cnt']

    feat_mtx.reshape((1, 6))
    # print('feature matrix: ')
    # print(feat_mtx)

    grade = model.predict(feat_mtx)

    if feat['spell_err_cnt'] / feat['word_cnt'] > 0.3:
        grade = 0

    return grade * 100
Ejemplo n.º 2
0
def check(essay):
    prep = pr.preproc(essay)

    essay_list = prep['essay_list']

    err = [word for word in essay_list if d.check(word) == False]
    sug = [d.suggest(word) for word in err]

    return [err, sug]
Ejemplo n.º 3
0
def check(essay):
    prep = pr.preproc(essay)

    essay_list = prep['essay_list']

    err = [word for word in essay_list if d.check(word)==False]
    sug = [d.suggest(word) for word in err]

    return [err, sug]
Ejemplo n.º 4
0
# load excel sheet
workbook = xlrd.open_workbook('../data/training_set_rel3.xlsx', on_demand=True)
sheet = workbook.sheet_by_index(0)

# create training matrix
data_num = 12978
feat_num = 6 + 1  # 6 features one label
feat_mtx = np.zeros((data_num, feat_num), dtype=np.double)

for i in range(data_num):
    # get essay from dataset
    essay = sheet.cell(i + 1, 2).value
    label = sheet.cell(i + 1, 6).value

    # pre process and extract features
    essay = pr.preproc(essay)
    feat = fe.featext(essay)

    # {'lexical_div': lex_div, 'word_cnt': w_cnt, 'long_word_cnt': lng_w_cnt, 'spell_err_cnt': spl,
    #        'distinct_word_cnt': dst_wrd_cnt, 'stem_cnt': stm_cnt}

    feat_mtx[i, 0] = feat['word_cnt']
    feat_mtx[i, 1] = feat['long_word_cnt']
    feat_mtx[i, 2] = feat['spell_err_cnt']
    feat_mtx[i, 3] = feat['lexical_div']
    feat_mtx[i, 4] = feat['distinct_word_cnt']
    feat_mtx[i, 5] = feat['stem_cnt']
    feat_mtx[i, 6] = label

    print(i)