def test_get_last_autotex_compiler(): autotex_log = "\n".join([ "[verbose]: ~~~~~~~~~~~ Running hpdflatex for the first time ~~~~~~~~", "...", "[verbose]: ~~~~~~~~~~~ Running pdflatex for the first time ~~~~~~~~", "...", "[verbose]: ~~~~~~~~~~~ Running pdflatex for the second time ~~~~~~~~", "...", ]) compiler = get_last_autotex_compiler(autotex_log) assert compiler == "pdflatex"
def get_last_colorized_entity( arxiv_id: ArxivId, compilation_path: RelativePath) -> Optional[EntityId]: original_compilation_path = directories.arxiv_subdir( "compiled-normalized-sources", arxiv_id) original_autogen_log_path = os.path.join(original_compilation_path, "auto_gen_ps.log") error_message = None if not os.path.exists(original_autogen_log_path): error_message = ( f"Could not find auto_gen_ps.log output from AutoTeX at {original_autogen_log_path}. " + "Has the original TeX for paper {arxiv_id} been compiled?") new_autogen_log_path = os.path.join(compilation_path, "auto_gen_ps.log") if not os.path.exists(new_autogen_log_path): error_message = ( f"Could not find auto_gen_ps.log output from AutoTeX at {original_autogen_log_path}. " + f"There may have been an error running AutoTeX on a colorized copy of paper {arxiv_id}." ) if error_message is not None: logging.warning( # pylint: disable=logging-not-lazy error_message + "It will not be possible to determine what compiler succeeded at compiling the " + "original paper, and therefore to determine which entities may have been " + "responsible for compilation failure. Entity batching may be less efficient.", ) return None # If TeX can process data that is not utf-8, then that non-utf-8 data can also be printed # out to the AutoTeX log. Therefore, when reading the AutoTeX log, the files need to be opened # in a way that is permissive of non-utf-8 data. with open(original_autogen_log_path, errors="surrogateescape") as file_: original_autogen_log = file_.read() with open(new_autogen_log_path, errors="surrogateescape") as file_: new_autogen_log = file_.read() # Get the name of the TeX compiler that successfully compiled the original TeX. compiler_name = get_last_autotex_compiler(original_autogen_log) if compiler_name is None: logging.warning( # pylint: disable=logging-not-lazy "Could not find the name of the TeX compiler that compiled the original TeX by " + "scanning the logs at %s. It will not be possible to determine what was the last " + "entity colorized before the compilation failure. Entity batching may be less efficient.", original_autogen_log_path, ) return None # Get the ID of the last entity that was colorized before compilation failure last_colorized_entity_id = get_last_colorized_entity_id( new_autogen_log, compiler_name) if last_colorized_entity_id is not None: logging.debug( # pylint: disable=logging-not-lazy "Entity '%s' was the last entity colorized before compilation failure in " + "directory %s. The colorization of this entity may be responsible for the " + "compilation error.", last_colorized_entity_id, original_autogen_log_path, ) else: logging.warning( # pylint: disable=logging-not-lazy "Unable to determine what was the last entity colorized before compilation failure " + "in source directory %s from log %s for compiler '%s'. Entity batching may be less efficient.", compilation_path, new_autogen_log_path, compiler_name, ) return last_colorized_entity_id