def get_grammar(path,files_list): """ Takes file list as input to save lexicons in the directory :param files_list: """ for files in pbar(files_list): if files.endswith(".txt"): with open(path+files) as f_: text=f_.read().replace('\t', '').replace('\t','').replace('.', '. ').strip() prompt = index_frame.loc[index_frame['filename']==files,'prompt'].iloc[0] grade = index_frame.loc[index_frame['filename']==files,'grade'].iloc[0] essay = Essay(text,prompt,grade,stop_words) essay.set_words() save_tags(essay.get_tagged())
sheet = wb.sheet_by_index(0) number_of_rows = sheet.nrows number_of_column = sheet.ncols essay_set = [] for row in range(1, number_of_rows): values = [] for col in range(number_of_column): value = sheet.cell(row, col).value try: value = str(int(value)) except ValueError: pass finally: values.append(value) essay = Essay.Essay(*values) essay_set.append(essay) data_set_preprocessing(essay_set, stopword) print("Finish load data and preprocessing in --- %s seconds ---" % (time.clock() - start_time)) essay_set_per_category = { 'score_a': [], 'score_b': [], 'score_c': [], 'score_d': [] } for essay in essay_set: if essay.human_rater_score == "A":
pbar = ProgressBar() # read all files in the directory and start processing for files in pbar(os.listdir(data_path_train)): if files.endswith(".txt"): with open(data_path_train + files) as f_: text = f_.read().replace('\t', '').replace('\t', '').replace('.', '. ').strip() prompt = index_frame.loc[index_frame['filename'] == files, 'prompt'].iloc[0] grade = index_frame.loc[index_frame['filename'] == files, 'grade'].iloc[0] essay = Essay(text, prompt, grade, stop_words) essay.set_words() #essay.get_tagged() essay.grammar = grammar #print(files) c1, a = essay.get_length() b = essay.get_spellingmistakes() #c1 = essay.get_sv_agreement() c2 = essay.get_verb_usage() c3 = essay.get_sentence_formation() d1 = essay.get_coherence() d2 = essay.get_topic_relevance() #evaluate = Evaluate(a,b,c1,c2,c3,d1,d2)