def dataframe_to_latex_table(df, columns_to_compare=[1, 2], columns_to_show=[0, 1, 2]): for row_index, row in df.iterrows(): row_values = [(df.columns[column_index], row[df.columns[column_index]]) for column_index in columns_to_compare] best_row_score = max(row_values, key=lambda item: item[1])[1] for res in row_values: if res[1] == best_row_score: df.at[row_index, res[0]] = "\cellcolor{yellow!60}" + str( round(float(res[1]), 6)) headers = [df.columns[column_index] for column_index in columns_to_show] not_show = utils.difference(df.columns, headers) df = df.drop(columns=not_show) latex_table = tabulate(df, headers=headers, tablefmt='latex_raw', showindex=False) fh.write_file('local_results_evaluation', 'comp_all_in_one_latex.txt', latex_table) return latex_table
def save_correct_split_word_by_newline_results(dirname, filename, result): """Guarda los resultados del correct_bigram. Espera recibir un objeto con la misma estructura que el devuelto por las funciones de procesamiento. """ changes = '' first_word = '' find_first_word = False second_word = '' find_second_word = False for elem in result['tokens']: if elem['isFirstPart']: first_word = elem['value'] find_first_word = True if elem['isSecondPart']: second_word = elem['value'] find_second_word = True correction_word = elem['correction'] if find_first_word and find_second_word: changes += f'first_word -> {first_word}\n' changes += f'second_word -> {second_word}\n' changes += f'correction -> {correction_word}\n' changes += '\n' find_first_word = False find_second_word = False first_word = '' second_word = '' fh.write_file(dirname, f'correct_split_word_by_newline_{filename}', changes)
def process_file(filename): ocr_output = fh.read_file(dir_ocr_outputs, filename) result_after_regex = post.process_with_regex(ocr_output) text_to_procces = result_after_regex #USO DE FLAG process_split_word_by_newline flag_pswbn = config.process_split_word_by_newline if flag_pswbn in ("split", "join"): result_after_pfe = pfe.correct_split_word_by_newline( result_after_regex, flag_pswbn, lm) results_evaluator.save_correct_split_word_by_newline_results( dir_step_2, filename, result_after_pfe) text_to_procces = text_generator.generate_text_split_words_by_newline( result_after_regex, result_after_pfe['tokens']) text_for_split_word_process = text_to_procces #USO DE FLAG correct_split_word flag_csw = config.correct_split_word if (flag_csw in ("any_line", "same_line")): result_after_pfe_2 = pfe.correct_split_word( text_for_split_word_process) text_to_procces = text_generator.generate_text_split_words( text_for_split_word_process, result_after_pfe_2['tokens']) fh.write_file(dir_step_1, f'improved_{filename}', text_to_procces) result = post.correct_errors_process(text_to_procces, lm) results_evaluator.save_postprocessor_results(dir_step_2, filename, result, text_to_procces)
def compare_configurations(file_names=[], output_file_name='comp_all_in_one.csv'): df = fh.merge_csvs(dirname='local_results_evaluation', file_names=file_names, merge_on_column='filename', usecols=['filename', 'ratio'], dtype='object') fh.write_file('local_results_evaluation', output_file_name, df.to_csv(index=False)) return df
def save_postprocessor_results(dirname, filename, result, original_text): """Guarda los resultados del postprocessor. Espera recibir un objeto con la misma estructura que el devuelto por las funciones de procesamiento. """ # corrected_text = result["corrected_text"] corrected_text = text_generator.generate_text(original_text, result['tokens']) ngrams_result = format_ngrams(result['tokens']) fh.write_file(dirname, filename, corrected_text) fh.write_file(dirname, f'ngrams_{filename}', ngrams_result)
def _write(self, prop): path = os.path.join(self.tx_dir, f'{prop}.json') file.write_file(path, getattr(self, prop), 'json')
def write_file(dirname, filename, content): fh.write_file(dirname, filename, content)
def describe_results(filename, output_filename='comp_all_in_one_metrics.csv'): df = fh.read_csv_file('local_results_evaluation', filename) # pdb.set_trace() fh.write_file('local_results_evaluation', output_filename, df.describe().to_csv())
def _save_data(self, name): cfg = self.boards[name]['config'] f_type = cfg.get('file_type', 'json') if f_type in ('json', 'yaml'): write_file(cfg['path'], self.boards[name]['data'], f_type)