def _get_symbol_data(arxiv_id: ArxivId, stdout: str) -> List[EquationSymbols]: symbol_data = [] for result in stdout.strip().splitlines(): data = json.loads(result) symbols = None if data["success"] is True: mathml = data["mathMl"] symbols = parse_equation(mathml) symbol_data.append( EquationSymbols( arxiv_id=arxiv_id, success=data["success"], equation_index=int(data["i"]), tex_path=data["tex_path"], equation=data["equation"], equation_start=int(data["equation_start"]), equation_depth=int(data["equation_depth"]), context_tex=data["context_tex"], error_message=data["errorMessage"], symbols=symbols, )) return symbol_data
def test_parse_equation(): with open( get_test_path(os.path.join("mathml", "x_sub_t_sub_i.xml")) ) as mathml_file: mathml = mathml_file.read() symbols = parse_equation(mathml) assert len(symbols) == 5 x_sub_t_sub_i = list( filter(lambda s: "msub" in str(s.element) and "x" in str(s.element), symbols) )[0] t_sub_i = list( filter(lambda s: "msub" in str(s.element) and s is not x_sub_t_sub_i, symbols) )[0] x = list(filter(lambda s: str(s.element) == "<mi>x</mi>", symbols))[0] t = list(filter(lambda s: str(s.element) == "<mi>t</mi>", symbols))[0] i = list(filter(lambda s: str(s.element) == "<mi>i</mi>", symbols))[0] assert len(x_sub_t_sub_i.children) == 2 assert x in x_sub_t_sub_i.children assert t_sub_i in x_sub_t_sub_i.children assert len(t_sub_i.children) == 2 assert t in t_sub_i.children assert i in t_sub_i.children
def _get_symbol_data(arxiv_id: ArxivId, stdout: str) -> Iterator[SymbolData]: for result in stdout.strip().splitlines(): data = json.loads(result) symbols = None if data["success"] is True: mathml = data["mathMl"] symbols = parse_equation(mathml) yield SymbolData( arxiv_id=arxiv_id, success=data["success"], equation_index=int(data["i"]), tex_path=data["tex_path"], equation=data["equation"], equation_start=int(data["equation_start"]), equation_depth=int(data["equation_depth"]), context_tex=data["context_tex"], errorMessage=data["errorMessage"], symbols=symbols, )