def test_get_latex_nodes(self): latextext = r'''Text and \`accent and \textbf{bold text} and $\vec b$ more stuff for Fran\c cois \begin{enumerate}[(i)] \item Hi there! % here goes a comment \item[a] Hello! @@@ \end{enumerate} Indeed thanks to \cite[Lemma 3]{Author}, we know that... Also: {\itshape some italic text}. ''' lw = LatexWalker(latextext, tolerant_parsing=False) #lw.get_latex_nodes(pos=0,stop_upon_closing_brace=None,stop_upon_end_environment=None, # stop_upon_closing_mathmode=None) p = latextext.find('Also: {') self.assertEqual(lw.get_latex_nodes(pos=p), ([ LatexCharsNode('Also: '), LatexGroupNode([ LatexMacroNode('itshape', None, [], macro_post_space=' '), LatexCharsNode('some italic text') ]), LatexCharsNode('.') ], p, len(latextext) - p - 1)) # trailing '\n' is not included p = latextext.find('Also: {') + len( 'Also: {') # points inside right after open brace self.assertEqual( lw.get_latex_nodes(pos=p, stop_upon_closing_brace='}'), ([ LatexMacroNode('itshape', None, [], macro_post_space=' '), LatexCharsNode('some italic text') ], p, len('\itshape some italic text}'))) # test our own macro lists etc. pindeed = latextext.find('Indeed thanks to') lineindeed = latextext[pindeed:latextext.find('\n', pindeed)] lw2 = LatexWalker(lineindeed, tolerant_parsing=False, macro_dict={'cite': MacrosDef('cite', False, 4)}) self.assertEqual(lw2.get_latex_nodes(pos=0), ([ LatexCharsNode('Indeed thanks to '), LatexMacroNode('cite', None, [ LatexCharsNode('['), LatexCharsNode('L'), LatexCharsNode('e'), LatexCharsNode('m'), ]), LatexCharsNode('ma 3]'), LatexGroupNode([LatexCharsNode('Author')]), LatexCharsNode(', we know that...'), ], 0, len(lineindeed)))
def test_errors(self): latextext = get_test_latex_data_with_possible_inconsistencies() lw = LatexWalker(latextext, tolerant_parsing=False) with self.assertRaises(LatexWalkerParseError): dummy = lw.get_latex_nodes() lwOk = LatexWalker(latextext, tolerant_parsing=True) # make sure that it goes through without raising: try: lwOk.get_latex_nodes() except LatexWalkerParseError as e: # should not raise this. self.fail( u"get_latex_nodes() raised LatexWalkerParseError, but it shouldn't have in " u"tolerant parsing mode!\n" + unicode(e))
def printLatexNodes(self, text): print("print " + text) w = LatexWalker(text) (nodelist, pos, len_) = w.get_latex_nodes(pos=0) print("len_ " + str(len_)) self._printLatexNodes(nodelist)
def findMathNode(self): w = LatexWalker(self.equation) (nodelist, pos, len_) = w.get_latex_nodes(pos=0) return self._findMathNodeInList(nodelist)
def traverseLatex(self, text): w = LatexWalker(text) (nodelist, pos, len_) = w.get_latex_nodes(pos=0) self._traverseLatex(nodelist)
def test2(self): w = LatexWalker(r"""\[\int_{a}^{b} x^2 \,dx \]""") (nodelist, pos, len_) = w.get_latex_nodes(pos=0) for i in range(len(nodelist[0].nodelist)): print(nodelist[0].nodelist[i])
def is_symbol(node, LatexMacroNode): # TODO: improve this function return len(node.macroname) == 1 if __name__ == "__main__": # Read latex file with open(input_tex, "r") as fp: text = fp.read() # Parse latex walker = LatexWalker(text) # Parse nodes nodes, pos, length = walker.get_latex_nodes() # Write output to a file with open(output_txt, "w") as fp: # Number of titles num_titles = 0 # If previous node is a citation no_new_paragraph = False # Loop over all nodes for node in nodes: # TODO: Implement more rules and process environments recursively # Ignore comment nodes if isinstance(node, LatexCommentNode):
from pylatexenc.latexwalker import LatexWalker, LatexEnvironmentNode import pylatexenc import sys fname = sys.argv[1] with open(fname, "r") as f: tex = f.read() print(fname) walker = LatexWalker(tex) nodelist, pos, leen = walker.get_latex_nodes() charslist = [] for node in nodelist: if node.isNodeType(pylatexenc.latexwalker.LatexEnvironmentNode): for subnode in node.nodelist: if subnode.isNodeType(pylatexenc.latexwalker.LatexCharsNode): charslist.append(str(subnode.chars).strip()) fullst = " ".join(charslist).replace("\n"," ") print(fullst)