Exemple #1
0
def test_get_last_autotex_compiler():
    autotex_log = "\n".join([
        "[verbose]:  ~~~~~~~~~~~ Running hpdflatex for the first time ~~~~~~~~",
        "...",
        "[verbose]:  ~~~~~~~~~~~ Running pdflatex for the first time ~~~~~~~~",
        "...",
        "[verbose]:  ~~~~~~~~~~~ Running pdflatex for the second time ~~~~~~~~",
        "...",
    ])
    compiler = get_last_autotex_compiler(autotex_log)
    assert compiler == "pdflatex"
Exemple #2
0
def get_last_colorized_entity(
        arxiv_id: ArxivId,
        compilation_path: RelativePath) -> Optional[EntityId]:

    original_compilation_path = directories.arxiv_subdir(
        "compiled-normalized-sources", arxiv_id)
    original_autogen_log_path = os.path.join(original_compilation_path,
                                             "auto_gen_ps.log")

    error_message = None
    if not os.path.exists(original_autogen_log_path):
        error_message = (
            f"Could not find auto_gen_ps.log output from AutoTeX at {original_autogen_log_path}. "
            + "Has the original TeX for paper {arxiv_id} been compiled?")

    new_autogen_log_path = os.path.join(compilation_path, "auto_gen_ps.log")
    if not os.path.exists(new_autogen_log_path):
        error_message = (
            f"Could not find auto_gen_ps.log output from AutoTeX at {original_autogen_log_path}. "
            +
            f"There may have been an error running AutoTeX on a colorized copy of paper {arxiv_id}."
        )

    if error_message is not None:
        logging.warning(  # pylint: disable=logging-not-lazy
            error_message +
            "It will not be possible to determine what compiler succeeded at compiling the "
            +
            "original paper, and therefore to determine which entities may have been "
            +
            "responsible for compilation failure. Entity batching may be less efficient.",
        )
        return None

    # If TeX can process data that is not utf-8, then that non-utf-8 data can also be printed
    # out to the AutoTeX log. Therefore, when reading the AutoTeX log, the files need to be opened
    # in a way that is permissive of non-utf-8 data.
    with open(original_autogen_log_path, errors="surrogateescape") as file_:
        original_autogen_log = file_.read()
    with open(new_autogen_log_path, errors="surrogateescape") as file_:
        new_autogen_log = file_.read()

    # Get the name of the TeX compiler that successfully compiled the original TeX.
    compiler_name = get_last_autotex_compiler(original_autogen_log)
    if compiler_name is None:
        logging.warning(  # pylint: disable=logging-not-lazy
            "Could not find the name of the TeX compiler that compiled the original TeX by "
            +
            "scanning the logs at %s. It will not be possible to determine what was the last "
            +
            "entity colorized before the compilation failure. Entity batching may be less efficient.",
            original_autogen_log_path,
        )
        return None

    # Get the ID of the last entity that was colorized before compilation failure
    last_colorized_entity_id = get_last_colorized_entity_id(
        new_autogen_log, compiler_name)
    if last_colorized_entity_id is not None:
        logging.debug(  # pylint: disable=logging-not-lazy
            "Entity '%s' was the last entity colorized before compilation failure in "
            +
            "directory %s. The colorization of this entity may be responsible for the "
            + "compilation error.",
            last_colorized_entity_id,
            original_autogen_log_path,
        )
    else:
        logging.warning(  # pylint: disable=logging-not-lazy
            "Unable to determine what was the last entity colorized before compilation failure "
            +
            "in source directory %s from log %s for compiler '%s'. Entity batching may be less efficient.",
            compilation_path,
            new_autogen_log_path,
            compiler_name,
        )
    return last_colorized_entity_id