Esempio n. 1
0
def dataframe_to_latex_table(df,
                             columns_to_compare=[1, 2],
                             columns_to_show=[0, 1, 2]):

    for row_index, row in df.iterrows():
        row_values = [(df.columns[column_index], row[df.columns[column_index]])
                      for column_index in columns_to_compare]

        best_row_score = max(row_values, key=lambda item: item[1])[1]

        for res in row_values:
            if res[1] == best_row_score:
                df.at[row_index, res[0]] = "\cellcolor{yellow!60}" + str(
                    round(float(res[1]), 6))

    headers = [df.columns[column_index] for column_index in columns_to_show]
    not_show = utils.difference(df.columns, headers)
    df = df.drop(columns=not_show)

    latex_table = tabulate(df,
                           headers=headers,
                           tablefmt='latex_raw',
                           showindex=False)

    fh.write_file('local_results_evaluation', 'comp_all_in_one_latex.txt',
                  latex_table)

    return latex_table
Esempio n. 2
0
def save_correct_split_word_by_newline_results(dirname, filename, result):
    """Guarda los resultados del correct_bigram.
    Espera recibir un objeto con la misma estructura que el devuelto por las funciones de
    procesamiento.
    """
    changes = ''

    first_word = ''
    find_first_word = False
    second_word = ''
    find_second_word = False
    for elem in result['tokens']:
        if elem['isFirstPart']:
            first_word = elem['value']
            find_first_word = True
        if elem['isSecondPart']:
            second_word = elem['value']
            find_second_word = True
            correction_word = elem['correction']
        if find_first_word and find_second_word:
            changes += f'first_word -> {first_word}\n'
            changes += f'second_word -> {second_word}\n'
            changes += f'correction -> {correction_word}\n'
            changes += '\n'
            find_first_word = False
            find_second_word = False
            first_word = ''
            second_word = ''

    fh.write_file(dirname, f'correct_split_word_by_newline_{filename}',
                  changes)
Esempio n. 3
0
def process_file(filename):
    ocr_output = fh.read_file(dir_ocr_outputs, filename)

    result_after_regex = post.process_with_regex(ocr_output)

    text_to_procces = result_after_regex

    #USO DE FLAG process_split_word_by_newline
    flag_pswbn = config.process_split_word_by_newline
    if flag_pswbn in ("split", "join"):
        result_after_pfe = pfe.correct_split_word_by_newline(
            result_after_regex, flag_pswbn, lm)
        results_evaluator.save_correct_split_word_by_newline_results(
            dir_step_2, filename, result_after_pfe)
        text_to_procces = text_generator.generate_text_split_words_by_newline(
            result_after_regex, result_after_pfe['tokens'])

    text_for_split_word_process = text_to_procces

    #USO DE FLAG correct_split_word
    flag_csw = config.correct_split_word
    if (flag_csw in ("any_line", "same_line")):
        result_after_pfe_2 = pfe.correct_split_word(
            text_for_split_word_process)
        text_to_procces = text_generator.generate_text_split_words(
            text_for_split_word_process, result_after_pfe_2['tokens'])

    fh.write_file(dir_step_1, f'improved_{filename}', text_to_procces)

    result = post.correct_errors_process(text_to_procces, lm)

    results_evaluator.save_postprocessor_results(dir_step_2, filename, result,
                                                 text_to_procces)
Esempio n. 4
0
def compare_configurations(file_names=[],
                           output_file_name='comp_all_in_one.csv'):
    df = fh.merge_csvs(dirname='local_results_evaluation',
                       file_names=file_names,
                       merge_on_column='filename',
                       usecols=['filename', 'ratio'],
                       dtype='object')

    fh.write_file('local_results_evaluation', output_file_name,
                  df.to_csv(index=False))
    return df
Esempio n. 5
0
def save_postprocessor_results(dirname, filename, result, original_text):
    """Guarda los resultados del postprocessor.
    Espera recibir un objeto con la misma estructura que el devuelto por las funciones de
    procesamiento.
    """
    # corrected_text = result["corrected_text"]
    corrected_text = text_generator.generate_text(original_text,
                                                  result['tokens'])
    ngrams_result = format_ngrams(result['tokens'])

    fh.write_file(dirname, filename, corrected_text)
    fh.write_file(dirname, f'ngrams_{filename}', ngrams_result)
Esempio n. 6
0
 def _write(self, prop):
     path = os.path.join(self.tx_dir, f'{prop}.json')
     file.write_file(path, getattr(self, prop), 'json')
Esempio n. 7
0
def write_file(dirname, filename, content):
    fh.write_file(dirname, filename, content)
Esempio n. 8
0
def describe_results(filename, output_filename='comp_all_in_one_metrics.csv'):
    df = fh.read_csv_file('local_results_evaluation', filename)
    # pdb.set_trace()
    fh.write_file('local_results_evaluation', output_filename,
                  df.describe().to_csv())
Esempio n. 9
0
    def _save_data(self, name):
        cfg = self.boards[name]['config']
        f_type = cfg.get('file_type', 'json')

        if f_type in ('json', 'yaml'):
            write_file(cfg['path'], self.boards[name]['data'], f_type)