Python latex_to_xml Examples

Programming Language: Python

Namespace/Package Name: latex_to_unicode

Method/Function: latex_to_xml

Examples at hotexamples.com: 2

Python latex_to_xml - 2 examples found. These are the top rated real world Python examples of latex_to_unicode.latex_to_xml extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def process(oldnode, informat):
    if oldnode.tag in [
            'url', 'href', 'mrf', 'doi', 'bibtype', 'bibkey', 'revision',
            'erratum', 'attachment', 'paper', 'presentation', 'dataset',
            'software', 'video'
    ]:
        return
    elif oldnode.tag in ['author', 'editor']:
        for oldchild in oldnode:
            process(oldchild, informat=informat)
    else:
        if informat == "latex":
            if len(oldnode) > 0:
                logging.error("field has child elements {}".format(', '.join(
                    child.tag for child in oldnode)))
            oldtext = ''.join(oldnode.itertext())
            newnode = latex_to_xml(oldtext, trivial_math=True, fixed_case=True)
            newnode.tag = oldnode.tag
            newnode.attrib.update(oldnode.attrib)
            replace_node(oldnode, newnode)

        maptext(oldnode, html.unescape)
        maptext(oldnode, curly_quotes)
        maptext(oldnode, clean_unicode)
        if oldnode.tag in ['title', 'booktitle']:
            protect(oldnode)

Example #2

Show file

File: normalize_anth.py Project: xinru1414/acl-anthology

def normalize(oldnode, informat):
    """
    Receives an XML 'paper' node and normalizes many of its fields, including:
    - Unescaping HTML
    - Normalizing quotes and other punctuation
    - Mapping many characters to unicode
    In addition, if the 'informat' is "latex", it will convert many LaTeX characters
    to unicode equivalents. Note that these latter LaTeX operations are not idempotent.
    """

    if oldnode.tag in [
            "url",
            "href",
            "mrf",
            "doi",
            "bibtype",
            "bibkey",
            "revision",
            "erratum",
            "attachment",
            "paper",
            "presentation",
            "dataset",
            "software",
            "video",
    ]:
        return
    elif oldnode.tag in ["author", "editor"]:
        for oldchild in oldnode:
            normalize(oldchild, informat=informat)
    else:
        if informat == "latex":
            if len(oldnode) > 0:
                logging.error("field has child elements {}".format(", ".join(
                    child.tag for child in oldnode)))
            oldtext = "".join(oldnode.itertext())
            newnode = latex_to_xml(
                oldtext,
                trivial_math=True,
                fixed_case=oldnode.tag in ["title", "booktitle"],
            )
            newnode.tag = oldnode.tag
            newnode.attrib.update(oldnode.attrib)
            replace_node(oldnode, newnode)

        maptext(oldnode, html.unescape)
        maptext(oldnode, curly_quotes)
        maptext(oldnode, clean_unicode)
        if oldnode.tag in ["title", "booktitle"]:
            protect(oldnode)