Python etree_replace_with_node_list Beispiele

Programmiersprache: Python

Namespace / Paketname: utils

Methode / Funktion: etree_replace_with_node_list

Beispiele auf hotexamples.com: 7

Python etree_replace_with_node_list - 7 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die utils.etree_replace_with_node_list, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

Datei: shortcodehtml2html.py Projekt: therealmarv/emas.transforms

 def traverse(node):
     if (node.tag == nsPrefix + 'div') and (node.attrib.get('class') == 'media'):
         objectNode = node.find(nsPrefix + 'object')
         if objectNode is not None:
             if (len(objectNode) > 2) and (objectNode[0].tag == nsPrefix + 'span') and (objectNode[0].attrib.get('class') == 'cnx_label') and (objectNode[1].tag == nsPrefix + 'a') and (objectNode[1].attrib.get('href') == ''):
                 del objectNode[0]
                 del objectNode[0]
                 # Put remaining children of objectNode into node
                 import utils
                 utils.etree_replace_with_node_list(node, objectNode, objectNode)
     for child in node:
         traverse(child)

Beispiel #2

Datei anzeigen

Datei: extract-teachers-guide.py Projekt: Siyavula/cnxml-validator

def traverse(iNode):
    if (iNode.tag == 'document') or ((iNode.tag == 'content') and (iNode.getparent().tag == 'document')) or ((iNode.tag == 'section') and (iNode.getparent().tag != 'activity')):
        children = iNode.getchildren()
        for child in children:
            if not traverse(child):
                iNode.remove(child)
        return True
    elif (iNode.tag == 'title') and (iNode.getparent().tag == 'section'):
        return True
    elif iNode.tag == 'teachers-guide':
        etree_replace_with_node_list(iNode.getparent(), iNode, iNode, keepTail=True)
        return True
    elif iNode.tag == 'exercises':
        for tgNode in iNode.xpath('.//teachers-guide'):
            etree_replace_with_node_list(tgNode.getparent(), tgNode, tgNode, keepTail=True)
        return True
    else:
        for tag in ['exercises', 'teachers-guide']:
            if len(iNode.xpath('.//' + tag)) > 0:
                sys.stderr.write('WARNING: Deleting node that contains %s.\n'%tag)
                sys.stderr.write(etree.tostring(iNode) + '\n')
        return False

Beispiel #3

Datei anzeigen

Datei: cnxmlplus2shortcodecnxml.py Projekt: Siyavula/emas.transforms

    def traverse_dom_for_cnxml(self, element):
        # traverse every element in tree, find matching environments, transform
        for child in element:
            self.traverse_dom_for_cnxml(child)

        childIndex = 0
        while childIndex < len(element):
            child = element[childIndex]

            if child.tag in ["video", "simulation", "presentation", "box"]:
                child.tag = "todo-" + child.tag
                childIndex += 1

            elif child.tag == "image":
                # <image> <arguments/> <src/> </image>
                mediaNode = utils.create_node("media")
                mediaNode.append(utils.create_node("image"))
                mediaNode.attrib["alt"] = "Image"
                urlNode = child.find("src")
                if urlNode is not None:
                    mediaNode[0].attrib["src"] = urlNode.text.strip()
                else:
                    mediaNode[0].attrib["src"] = ""
                mediaNode.tail = child.tail
                element[childIndex] = mediaNode
                childIndex += 1

            elif child.tag == "figure":
                typeNode = child.find("type")
                if typeNode is not None:
                    typ = typeNode.text.strip()
                    child.attrib["type"] = typ
                    typeNode.tag = "label"
                    typeNode.text = {"figure": "Figure", "table": "Table"}[typ]
                childIndex += 1

            elif child.tag == "caption":
                if (len(child) == 1) and (child[0].tag == "para"):
                    utils.etree_replace_with_node_list(child, child[0], child[0])
                childIndex += 1

            elif child.tag == "activity":
                # <activity type="activity"><title/> <section><title/>...</section> </activity>
                child.tag = "example"
                child.append(
                    utils.create_node(
                        "label",
                        text={
                            "g_experiment": "General experiment",
                            "f_experiment": "Formal experiment",
                            "i_experiment": "Informal experiment",
                            "activity": "Activity",
                            "Investigation": "Investigation",
                            "groupdiscussion": "Group discussion",
                            "casestudy": "Case study",
                            "project": "Project",
                        }[child.attrib["type"]],
                    )
                )
                pos = 1
                while pos < len(child):
                    if child[pos].tag == "section":
                        sectionNode = child[pos]
                        assert sectionNode[0].tag == "title"
                        del child[pos]
                        child.insert(pos, utils.create_node("para"))
                        child[pos].append(utils.create_node("emphasis", text=sectionNode[0].text.strip()))
                        child[pos][-1].attrib["effect"] = "bold"
                        pos += 1
                        sectionChildren = sectionNode.getchildren()
                        for i in range(1, len(sectionChildren)):
                            child.insert(pos, sectionChildren[i])
                            pos += 1
                    else:
                        pos += 1
                childIndex += 1

            elif child.tag == "worked_example":
                # <worked_example> <title/> <question/> <answer> ... <workstep> <title/> ... </workstep> </answer> </worked_example>
                child.tag = "example"
                newSubChildren = []
                newSubChildren.append(utils.create_node("label", text="Worked example"))
                pos = 1
                for subChild in child:
                    if subChild.tag == "title":
                        newSubChildren.append(subChild)
                    elif subChild.tag == "question":
                        newSubChildren.append(subChild)
                        subChild.tag = "section"
                        subChild.append(utils.create_node("title", text="Question"))
                    elif subChild.tag == "answer":
                        newSubChildren.append(subChild)
                        subChild.tag = "section"
                        subChild.append(utils.create_node("title", text="Answer"))
                        for x in subChild:
                            if x.tag == "workstep":
                                x.tag = "section"
                childIndex += 1

            elif child.tag == "note":
                child.insert(
                    0,
                    utils.create_node(
                        "label",
                        text={"warning": "Warning", "tip": "Tip", "note": "Note", "aside": "Interesting Fact"}.get(
                            child.attrib["type"], child.attrib["type"]
                        ),
                    ),
                )
                childIndex += 1

            elif child.tag == "math_identity":
                del element[childIndex]  # Remove math_identity from DOM, still available as child
                ruleNode = utils.create_node("rule")
                ruleNode.attrib["type"] = "Identity"
                child.tag = "statement"
                ruleNode.append(child)
                element.insert(childIndex, ruleNode)
                childIndex += 1

            elif child.tag == "nuclear_notation":
                namespace = "http://www.w3.org/1998/Math/MathML"
                mathNode = utils.create_node("math", namespace=namespace)
                mathNode.append(utils.create_node("msubsup", namespace=namespace))
                mathNode[-1].append(utils.create_node("mo", namespace=namespace, text=u"\u200b"))
                mathNode[-1].append(utils.create_node("mn", namespace=namespace, text=child.find("atomic_number").text))
                if child.find("mass_number") is not None:
                    massNumber = child.find("mass_number").text
                else:
                    massNumber = u"\u200b"
                mathNode[-1].append(utils.create_node("mn", namespace=namespace, text=massNumber))
                mathNode.append(utils.create_node("mtext", namespace=namespace, text=child.find("symbol").text))

                mathNode.tail = child.tail
                element[childIndex] = mathNode
                childIndex += 1

            elif child.tag == "math_extension":
                child.tag = "note"
                titleNode = child.find("title")
                if titleNode is not None:
                    titleNode.tag = "label"
                    titleNode.text = u"Extension \u2014 " + titleNode.text.strip()
                else:
                    child.insert(0, utils.create_node("label", text="Extension"))
                bodyNode = child.find("body")
                utils.etree_replace_with_node_list(child, bodyNode, bodyNode)
                childIndex += 1

            elif child.tag == "section":
                # Check that it is not an activity section
                if child.getparent().tag != "activity":
                    shortCodeNode = child.find("shortcode")
                    if shortCodeNode is None:
                        if (child.attrib.get("type") not in ["subsubsection", "subsubsubsection"]) and (
                            child.find("title").text.strip() != "Chapter summary"
                        ):
                            print 'WARNING: no shortcode for section "%s"' % child.find("title").text.strip()
                            shortcode = "SHORTCODE"
                        else:
                            shortcode = None
                    else:
                        if (child.attrib.get("type") in ["subsubsection", "subsubsubsection"]) or (
                            child.find("title").text.strip() == "Chapter summary"
                        ):
                            print 'WARNING: section "%s" should not have a shortcode' % child.find("title").text.strip()
                        shortcode = shortCodeNode.text.strip()
                        child.remove(shortCodeNode)
                    """ # Commented out so that shortcodes do not get displayed
                    if shortcode is not None:
                        titleNode = child.find('title')
                        if len(titleNode) == 0:
                            if titleNode.text is None:
                                titleNode.text = ''
                            titleNode.text += ' [' + shortcode + ']'
                        else:
                            if titleNode[-1].tail is None:
                                titleNode[-1].tail = ''
                            titleNode[-1].tail += ' [' + shortcode + ']'
                    """
                childIndex += 1

            elif child.tag == "latex":
                if child.attrib.get("display", "inline") == "block":
                    delimiters = "[]"
                else:
                    delimiters = "()"
                if child.text is None:
                    child.text = ""
                child.text = "\\" + delimiters[0] + child.text
                if len(child) > 0:
                    if child[-1].text is None:
                        child[-1].tail = ""
                    child[-1].tail += "\\" + delimiters[1]
                else:
                    child.text += "\\" + delimiters[1]
                utils.etree_replace_with_node_list(element, child, child)
                childIndex += len(child)

            elif child.tag in ["chem_compound", "spec_note"]:
                assert len(child) == 0, "<chem_compound> element not expected to have sub-elements."
                if child.text is None:
                    child.text = ""
                child.text = child.text.strip()
                assert child.text != "", "<chem_compound> element must contain text."

                compoundText = child.text
                pos = 0
                textOpen = False
                while pos < len(compoundText):
                    if "a" <= compoundText[pos].lower() <= "z":
                        if not textOpen:
                            compoundText = compoundText[:pos] + r"\text{" + compoundText[pos:]
                            textOpen = True
                            pos += len(r"\text{") + 1
                        else:
                            pos += 1
                    else:
                        if textOpen:
                            compoundText = compoundText[:pos] + "}" + compoundText[pos:]
                            textOpen = False
                            pos += 2
                        else:
                            pos += 1
                if textOpen:
                    compoundText += "}"
                compoundXml = utils.xmlify(r"\(" + compoundText + r"\)")

                compoundDom = etree.fromstring(compoundXml[compoundXml.find("<formula ") : compoundXml.rfind("\n</p>")])
                utils.etree_replace_with_node_list(element, child, compoundDom)
                childIndex += len(child)

            else:
                path = [child.tag]
                node = child
                while True:
                    node = node.getparent()
                    if node is None:
                        break
                    path.append(node.tag)
                path.reverse()

                namespaces = {"m": "http://www.w3.org/1998/Math/MathML"}
                valid = [
                    "emphasis",
                    "para",
                    "figure/type",
                    "exercise/problem",
                    "exercise/title",
                    "exercise/shortcodes/entry/number",
                    "exercise/shortcodes/entry/shortcode",
                    "exercise/shortcodes/entry/url",
                    "exercise/shortcodes/entry/todo-content",
                    "list/item/label",
                    "table/tgroup/tbody/row/entry",
                    "table/tgroup/colspec",
                    "definition/term",
                    "definition/meaning",
                    "sup",
                    "sub",
                    "m:mn",
                    "m:mo",
                    "m:mi",
                    "m:msup",
                    "m:mrow",
                    "m:math",
                    "m:mtable",
                    "m:mtr",
                    "m:mtd",
                    "m:msub",
                    "m:mfrac",
                    "m:msqrt",
                    "m:mspace",
                    "m:mstyle",
                    "m:mfenced",
                    "m:mtext",
                    "m:mroot",
                    "m:mref",
                    "m:msubsup",
                    "m:munderover",
                    "m:munder",
                    "m:mover",
                    "m:mphantom",
                    "equation",
                    "link",
                    "quote",
                    "rule/title",
                    "rule/statement",
                    "rule/proof",
                    "section/title",
                    "section/shortcode",
                    "image/arguments",
                    "image/src",
                    "number/coeff",
                    "number/exp",
                    "number/base",
                    "nuclear_notation/mass_number",
                    "nuclear_notation/atomic_number",
                    "nuclear_notation/symbol",
                    "pspicture/code",
                    "pspicture/usepackage",
                    "tikzpicture/code",
                    "video/title",
                    "video/shortcode",
                    "video/url",
                    "video/width",
                    "video/height",
                    "worked_example/answer/workstep/title",
                    "worked_example/question",
                    "worked_example/title",
                    "activity/title",
                    "math_extension/title",
                    "math_extension/body",
                    "math_identity",
                    "document/content/title",
                    "document/content/content",
                    "simulation/title",
                    "simulation/shortcode",
                    "simulation/url",
                    "simulation/width",
                    "simulation/height",
                    "simulation/embed",
                    "presentation/title",
                    "presentation/url",
                    "presentation/shortcode",
                    "presentation/embed",
                    "box",
                ]
                validSet = set([])
                for entry in valid:
                    entry = entry.split("/")
                    for i in range(len(entry)):
                        if ":" in entry[i]:
                            entry[i] = entry[i].split(":")
                            assert len(entry[i]) == 2
                            entry[i] = "{%s}%s" % (namespaces[entry[i][0]], entry[i][1])
                        validSet.add(tuple(entry[: i + 1]))
                valid = validSet

                passed = False
                for entry in valid:
                    if tuple(path[-len(entry) :]) == entry:
                        passed = True
                        break
                if not passed:
                    path = "/".join(path)
                    for key, url in namespaces.iteritems():
                        path = path.replace("{%s}" % url, key + ":")
                    LOGGER.info("Unhandled element: " + path)

                childIndex += 1

Beispiel #4

Datei anzeigen

Datei: cnxmlplus2shortcodecnxml.py Projekt: Siyavula/emas.transforms

    def transform(self, dom):
        # Currency
        for currencyNode in dom.xpath("//currency"):
            latexMode = utils.etree_in_context(currencyNode, "latex")
            symbolNode = currencyNode.find("symbol")
            if symbolNode is None:
                symbol = "R"
                symbolLocation = "front"
            else:
                symbol = symbolNode.text.strip()
                symbolLocation = symbolNode.attrib.get("location", "front")
            numberNode = currencyNode.find("number")
            if numberNode.text is None:
                numberNode.text = ""
            # Set default precision to 0 if number is an int, and to 2 if it is a float
            try:
                int(numberNode.text.strip())
                defaultPrecision = 0
            except ValueError:
                defaultPrecision = 2
            currencyPrecision = int(currencyNode.attrib.get("precision", defaultPrecision))
            numberNode.text = ("%%.%if" % currencyPrecision) % float(numberNode.text.strip())

            replacementNode = etree.Element("dummy")
            if symbolLocation == "front":
                if latexMode:
                    replacementNode.text = r"\text{" + symbol + " }"
                else:
                    replacementNode.text = symbol + u"\u00a0"
                replacementNode.append(numberNode)
            else:
                replacementNode.append(numberNode)
                if latexMode:
                    replacementNode.tail = r"\text{ " + symbol + "}"
                else:
                    replacementNode.tail = u"\u00a0" + symbol
            utils.etree_replace_with_node_list(currencyNode.getparent(), currencyNode, replacementNode)

        # Percentage
        for percentageNode in dom.xpath("//percentage"):
            latexMode = utils.etree_in_context(percentageNode, "latex")
            percentageNode.tag = "number"
            if percentageNode.tail is None:
                percentageNode.tail = ""
            if latexMode:
                percentageNode.tail = r"\%" + percentageNode.tail
            else:
                percentageNode.tail = "%" + percentageNode.tail

        # United numbers: ensure that units follow numbers
        for node in dom.xpath("//unit_number"):
            if (len(node) == 2) and (node[0].tag == "unit") and (node[1].tag == "number"):
                unitNode = node[0]
                numberNode = node[1]
                del node[0]
                del node[0]
                node.append(numberNode)
                node.append(unitNode)

        # Numbers
        for numberNode in dom.xpath("//number"):
            # Avoid shortcode exercise numbers
            if (numberNode.getparent().tag == "entry") and (numberNode.getparent().getparent().tag == "shortcodes"):
                continue
            latexMode = utils.etree_in_context(numberNode, "latex")
            if (len(numberNode) == 0) and ("e" in numberNode.text):
                # Number in exponential notation: convert to <coeff> and <exp>
                numberText = numberNode.text
                float(numberText)  # Check that it is really a float
                numberNode.text = None
                numberNode.append(etree.Element("coeff"))
                pos = numberText.find("e")
                numberNode[-1].text = numberText[:pos]
                numberNode.append(etree.Element("exp"))
                numberNode[-1].text = str(int(numberText[pos + 1 :]))

            if len(numberNode) == 0:
                # No children, means it's just a plain number
                coeffText = utils.format_number(numberNode.text.strip())
                try:
                    if latexMode:
                        dummyNode = etree.fromstring(r"<dummy>\text{" + coeffText + "}</dummy>")
                    else:
                        dummyNode = etree.fromstring("<dummy>" + coeffText + "</dummy>")
                except etree.XMLSyntaxError, msg:
                    print repr(coeffText)
                    raise etree.XMLSyntaxError, msg
            else:
                # Scientific or exponential notation: parse out coefficient, base and exponent
                coeffNode = numberNode.find("coeff")
                expNode = numberNode.find("exp")
                baseNode = numberNode.find("base")
                if coeffNode is None:
                    # Exponential
                    if baseNode is None:
                        baseText = utils.format_number("10")
                    else:
                        baseText = utils.format_number(baseNode.text.strip())
                    assert expNode is not None, etree.tostring(numberNode)
                    expText = utils.format_number(expNode.text.strip())
                    if latexMode:
                        dummyNode = etree.fromstring(
                            r"<dummy>\text{" + baseText + r"}^{\text{" + expText + r"}}</dummy>"
                        )
                    else:
                        dummyNode = etree.fromstring("<dummy>" + baseText + "<sup>" + expText + "</sup></dummy>")
                else:
                    # Scientific notation or plain number (<coeff> only)
                    coeffText = utils.format_number(coeffNode.text.strip())
                    if expNode is None:
                        assert baseNode is None
                        try:
                            if latexMode:
                                dummyNode = etree.fromstring(r"<dummy>\text{" + coeffText + "}</dummy>")
                            else:
                                dummyNode = etree.fromstring("<dummy>" + coeffText + "</dummy>")
                        except etree.XMLSyntaxError, msg:
                            print repr(coeffText)
                            raise etree.XMLSyntaxError, msg
                    else:
                        if baseNode is None:

Beispiel #5

Datei anzeigen

Datei: cnxmlplus2shortcodecnxml.py Projekt: Siyavula/emas.transforms

                        expText = utils.format_number(expNode.text.strip())
                        if latexMode:
                            dummyNode = etree.fromstring(
                                r"<dummy>\text{"
                                + coeffText
                                + r" } &#215; \text{ "
                                + baseText
                                + r"}^{\text{"
                                + expText
                                + r"}}</dummy>"
                            )
                        else:
                            dummyNode = etree.fromstring(
                                "<dummy>" + coeffText + " &#215; " + baseText + "<sup>" + expText + "</sup></dummy>"
                            )
            utils.etree_replace_with_node_list(numberNode.getparent(), numberNode, dummyNode)

        # Units
        for unitNode in dom.xpath("//unit"):
            latexMode = utils.etree_in_context(unitNode, "latex")
            if unitNode.text is None:
                unitNode.text = ""
            unitNode.text = unitNode.text.lstrip()
            if latexMode:
                unitNode.text = r"\text{" + unitNode.text
            if len(unitNode) == 0:
                unitNode.text = unitNode.text.rstrip()
                if latexMode:
                    unitNode.text += "}"
            else:
                if unitNode[-1].tail is None:

Beispiel #6

Datei anzeigen

Datei: cnxmlplus2shortcodecnxml_standalone.py Projekt: therealmarv/emas.transforms

    def traverse_dom_for_cnxml(self, element):
        # traverse every element in tree, find matching environments, transform
        for child in element:
            self.traverse_dom_for_cnxml(child)

        childIndex = 0
        while childIndex < len(element):
            child = element[childIndex]

            if child.tag in ['video', 'simulation', 'presentation', 'box']:
                child.tag = 'todo-' + child.tag
                childIndex += 1

            elif child.tag == 'image':
                # <image> <arguments/> <src/> </image>
                mediaNode = utils.create_node('media')
                mediaNode.append(utils.create_node('image'))
                mediaNode.attrib['alt'] = 'Image'
                urlNode = child.find('src')
                if urlNode is not None:
                    mediaNode[0].attrib['src'] = urlNode.text.strip()
                else:
                    mediaNode[0].attrib['src'] = ''
                mediaNode.tail = child.tail
                element[childIndex] = mediaNode
                childIndex += 1

            elif child.tag == 'figure':
                typeNode = child.find('type')
                if typeNode is not None:
                    typ = typeNode.text.strip()
                    child.attrib['type'] = typ
                    typeNode.tag = 'label'
                    typeNode.text = {'figure': 'Figure', 'table': 'Table'}[typ]
                childIndex += 1

            elif child.tag == 'caption':
                if (len(child) == 1) and (child[0].tag == 'para'):
                    utils.etree_replace_with_node_list(child, child[0], child[0])
                childIndex += 1

            elif child.tag == 'activity':
                # <activity type="activity"><title/> <section><title/>...</section> </activity>
                child.tag = 'example'
                child.append(utils.create_node('label', text={
                    'g_experiment': 'General experiment',
                    'f_experiment': 'Formal experiment',
                    'i_experiment': 'Informal experiment',
                    'activity': 'Activity',
                    'Investigation': 'Investigation',
                    'groupdiscussion': 'Group discussion',
                    'casestudy': 'Case study',
                    'project': 'Project'}[child.attrib['type']]))
                pos = 1
                while pos < len(child):
                    if child[pos].tag == 'section':
                        sectionNode = child[pos]
                        assert sectionNode[0].tag == 'title'
                        del child[pos]
                        child.insert(pos, utils.create_node('para'))
                        child[pos].append(utils.create_node('emphasis', text=sectionNode[0].text.strip()))
                        child[pos][-1].attrib['effect'] = 'bold'
                        pos += 1
                        sectionChildren = sectionNode.getchildren()
                        for i in range(1, len(sectionChildren)):
                            child.insert(pos, sectionChildren[i])
                            pos += 1
                    else:
                        pos += 1
                childIndex += 1

            elif child.tag == 'worked_example':
                # <worked_example> <title/> <question/> <answer> ... <workstep> <title/> ... </workstep> </answer> </worked_example>
                child.tag = 'example'
                newSubChildren = []
                newSubChildren.append(utils.create_node('label', text="Worked example"))
                pos = 1
                for subChild in child:
                    if subChild.tag == 'title':
                        newSubChildren.append(subChild)
                    elif subChild.tag == 'question':
                        newSubChildren.append(subChild)
                        subChild.tag = 'section'
                        subChild.append(utils.create_node('title', text='Question'))
                    elif subChild.tag == 'answer':
                        newSubChildren.append(subChild)
                        subChild.tag = 'section'
                        subChild.append(utils.create_node('title', text='Answer'))
                        for x in subChild:
                            if x.tag == 'workstep':
                                x.tag = 'section'
                childIndex += 1

            elif child.tag == 'note':
                child.insert(0, utils.create_node('label', text={
                    'warning': 'Warning',
                    'tip': 'Tip',
                    'note': 'Note',
                    'aside': 'Interesting Fact'}.get(child.attrib['type'], child.attrib['type'])))
                childIndex += 1

            elif child.tag == 'math_identity':
                del element[childIndex] # Remove math_identity from DOM, still available as child
                ruleNode = utils.create_node('rule')
                ruleNode.attrib['type'] = 'Identity'
                child.tag = 'statement'
                ruleNode.append(child)
                element.insert(childIndex, ruleNode)
                childIndex += 1

            elif child.tag == 'nuclear_notation':
                namespace = 'http://www.w3.org/1998/Math/MathML'
                mathNode = utils.create_node('math', namespace=namespace)
                mathNode.append(utils.create_node('msubsup', namespace=namespace))
                mathNode[-1].append(utils.create_node('mo', namespace=namespace, text=u'\u200b'))
                mathNode[-1].append(utils.create_node('mn', namespace=namespace, text=child.find('atomic_number').text))
                if child.find('mass_number') is not None:
                    massNumber = child.find('mass_number').text
                else:
                    massNumber = u'\u200b'
                mathNode[-1].append(utils.create_node('mn', namespace=namespace, text=massNumber))
                mathNode.append(utils.create_node('mtext', namespace=namespace, text=child.find('symbol').text))

                mathNode.tail = child.tail
                element[childIndex] = mathNode
                childIndex += 1

            elif child.tag == 'math_extension':
                child.tag = 'note'
                titleNode = child.find('title')
                if titleNode is not None:
                    titleNode.tag = 'label'
                    titleNode.text = u'Extension \u2014 ' + titleNode.text.strip()
                else:
                    child.insert(0, utils.create_node('label', text='Extension'))
                bodyNode = child.find('body')
                utils.etree_replace_with_node_list(child, bodyNode, bodyNode)
                childIndex += 1

            elif child.tag == 'section':
                # Check that it is not an activity section
                if child.getparent().tag != 'activity':
                    shortCodeNode = child.find('shortcode')
                    if shortCodeNode is None:
                        if (child.attrib.get('type') not in ['subsubsection', 'subsubsubsection']) and (child.find('title').text.strip() != 'Chapter summary'):
                            print 'WARNING: no shortcode for section "%s"'%child.find('title').text.strip()
                            shortcode = 'SHORTCODE'
                        else:
                            shortcode = None
                    else:
                        if (child.attrib.get('type') in ['subsubsection', 'subsubsubsection']) or (child.find('title').text.strip() == 'Chapter summary'):
                            print 'WARNING: section "%s" should not have a shortcode'%child.find('title').text.strip()
                        shortcode = shortCodeNode.text.strip()
                        child.remove(shortCodeNode)
                    """ # Commented out so that shortcodes do not get displayed
                    if shortcode is not None:
                        titleNode = child.find('title')
                        if len(titleNode) == 0:
                            if titleNode.text is None:
                                titleNode.text = ''
                            titleNode.text += ' [' + shortcode + ']'
                        else:
                            if titleNode[-1].tail is None:
                                titleNode[-1].tail = ''
                            titleNode[-1].tail += ' [' + shortcode + ']'
                    """
                childIndex += 1

            elif child.tag == 'latex':
                if child.attrib.get('display', 'inline') == 'block':
                    delimiters = '[]'
                else:
                    delimiters = '()'
                if child.text is None:
                    child.text = ''
                child.text = '\\' + delimiters[0] + child.text
                if len(child) > 0:
                    if child[-1].text is None:
                        child[-1].tail = ''
                    child[-1].tail += '\\' + delimiters[1]
                else:
                    child.text += '\\' + delimiters[1]
                utils.etree_replace_with_node_list(element, child, child)
                childIndex += len(child)

            elif child.tag in ['chem_compound', 'spec_note']:
                assert len(child) == 0, "<chem_compound> element not expected to have sub-elements."
                if child.text is None:
                    child.text = ''
                child.text = child.text.strip()
                assert child.text != '', "<chem_compound> element must contain text."

                compoundText = child.text
                pos = 0
                textOpen = False
                while pos < len(compoundText):
                    if 'a' <= compoundText[pos].lower() <= 'z':
                        if not textOpen:
                            compoundText = compoundText[:pos] + r'\text{' + compoundText[pos:]
                            textOpen = True
                            pos += len(r'\text{') + 1
                        else:
                            pos += 1
                    else:
                        if textOpen:
                            compoundText = compoundText[:pos] + '}' + compoundText[pos:]
                            textOpen = False
                            pos += 2
                        else:
                            pos += 1
                if textOpen:
                    compoundText += '}'
                compoundXml = utils.xmlify(r'\(' + compoundText + r'\)')

                compoundDom = etree.fromstring(compoundXml[compoundXml.find('<formula '):compoundXml.rfind('\n</p>')])
                utils.etree_replace_with_node_list(element, child, compoundDom)
                childIndex += len(child)

            else:
                path = [child.tag]
                node = child
                while True:
                    node = node.getparent()
                    if node is None:
                        break
                    path.append(node.tag)
                path.reverse()

                namespaces = {'m': 'http://www.w3.org/1998/Math/MathML'}
                valid = [
                    'emphasis',
                    'para',
                    'figure/type',
                    'exercise/problem', 'exercise/title',
                    'exercise/shortcodes/entry/number', 'exercise/shortcodes/entry/shortcode', 'exercise/shortcodes/entry/url', 'exercise/shortcodes/entry/todo-content',
                    'list/item/label',
                    'table/tgroup/tbody/row/entry',
                    'table/tgroup/colspec',
                    'definition/term', 'definition/meaning',
                    'sup',
                    'sub',
                    'm:mn', 'm:mo', 'm:mi', 'm:msup', 'm:mrow', 'm:math', 'm:mtable', 'm:mtr', 'm:mtd', 'm:msub', 'm:mfrac', 'm:msqrt', 'm:mspace', 'm:mstyle', 'm:mfenced', 'm:mtext', 'm:mroot', 'm:mref', 'm:msubsup', 'm:munderover', 'm:munder', 'm:mover', 'm:mphantom',
                    'equation',
                    'link',
                    'quote',
                    'rule/title', 'rule/statement', 'rule/proof',

                    'section/title',
                    'section/shortcode',
                    'image/arguments',
                    'image/src',
                    'number/coeff', 'number/exp', 'number/base',
                    'nuclear_notation/mass_number', 'nuclear_notation/atomic_number', 'nuclear_notation/symbol',
                    'pspicture/code', 'pspicture/usepackage',
                    'tikzpicture/code',
                    'video/title', 'video/shortcode', 'video/url', 'video/width', 'video/height',
                    'worked_example/answer/workstep/title', 'worked_example/question', 'worked_example/title',
                    'activity/title',
                    'math_extension/title',
                    'math_extension/body',
                    'math_identity',
                    'document/content/title',
                    'document/content/content',
                    'simulation/title', 'simulation/shortcode', 'simulation/url', 'simulation/width', 'simulation/height', 'simulation/embed',
                    'presentation/title', 'presentation/url', 'presentation/shortcode', 'presentation/embed',
                    'box',
                ]
                validSet = set([])
                for entry in valid:
                    entry = entry.split('/')
                    for i in range(len(entry)):
                        if ':' in entry[i]:
                            entry[i] = entry[i].split(':')
                            assert len(entry[i]) == 2
                            entry[i] = '{%s}%s'%(namespaces[entry[i][0]], entry[i][1])
                        validSet.add(tuple(entry[:i+1]))
                valid = validSet

                passed = False
                for entry in valid:
                    if tuple(path[-len(entry):]) == entry:
                        passed = True
                        break
                if not passed:
                    path = '/'.join(path)
                    for key, url in namespaces.iteritems():
                        path = path.replace('{%s}'%url, key+':')
                    LOGGER.info('Unhandled element: ' + path)

                childIndex += 1

Beispiel #7

Datei anzeigen

Datei: cnxmlplus2shortcodecnxml_standalone.py Projekt: therealmarv/emas.transforms

    def transform(self, dom):
        # Currency
        for currencyNode in dom.xpath('//currency'):
            latexMode = utils.etree_in_context(currencyNode, 'latex')
            symbolNode = currencyNode.find('symbol')
            if symbolNode is None:
                symbol = 'R'
                symbolLocation = 'front'
            else:
                symbol = symbolNode.text.strip()
                symbolLocation = symbolNode.attrib.get('location', 'front')
            numberNode = currencyNode.find('number')
            if numberNode.text is None:
                numberNode.text = ''
            # Set default precision to 0 if number is an int, and to 2 if it is a float
            try:
                int(numberNode.text.strip())
                defaultPrecision = 0
            except ValueError:
                defaultPrecision = 2
            currencyPrecision = int(currencyNode.attrib.get('precision', defaultPrecision))
            numberNode.text = ("%%.%if"%currencyPrecision)%float(numberNode.text.strip())

            replacementNode = etree.Element('dummy')
            if symbolLocation == 'front':
                if latexMode:
                    replacementNode.text = r'\text{' + symbol + ' }'
                else:
                    replacementNode.text = symbol + u'\u00a0'
                replacementNode.append(numberNode)
            else:
                replacementNode.append(numberNode)
                if latexMode:
                    replacementNode.tail = r'\text{ ' + symbol + '}'
                else:
                    replacementNode.tail = u'\u00a0' + symbol
            utils.etree_replace_with_node_list(currencyNode.getparent(), currencyNode, replacementNode)

        # Percentage
        for percentageNode in dom.xpath('//percentage'):
            latexMode = utils.etree_in_context(percentageNode, 'latex')
            percentageNode.tag = 'number'
            if percentageNode.tail is None:
                percentageNode.tail = ''
            if latexMode:
                percentageNode.tail = r'\%' + percentageNode.tail
            else:
                percentageNode.tail = '%' + percentageNode.tail

        # United numbers: ensure that units follow numbers
        for node in dom.xpath('//unit_number'):
            if (len(node) == 2) and (node[0].tag == 'unit') and (node[1].tag == 'number'):
                unitNode = node[0]
                numberNode = node[1]
                del node[0]
                del node[0]
                node.append(numberNode)
                node.append(unitNode)

        # Numbers
        for numberNode in dom.xpath('//number'):
            # Avoid shortcode exercise numbers
            if (numberNode.getparent().tag == 'entry') and (numberNode.getparent().getparent().tag == 'shortcodes'):
                continue
            latexMode = utils.etree_in_context(numberNode, 'latex')
            if (len(numberNode) == 0) and ('e' in numberNode.text):
                # Number in exponential notation: convert to <coeff> and <exp>
                numberText = numberNode.text
                float(numberText) # Check that it is really a float
                numberNode.text = None
                numberNode.append(etree.Element('coeff'))
                pos = numberText.find('e')
                numberNode[-1].text = numberText[:pos]
                numberNode.append(etree.Element('exp'))
                numberNode[-1].text = str(int(numberText[pos+1:]))

            if len(numberNode) == 0:
                # No children, means it's just a plain number
                coeffText = utils.format_number(numberNode.text.strip())
                try:
                    if latexMode:
                        dummyNode = etree.fromstring(r'<dummy>\text{' + coeffText + '}</dummy>')
                    else:
                        dummyNode = etree.fromstring('<dummy>' + coeffText + '</dummy>')
                except etree.XMLSyntaxError, msg:
                    print repr(coeffText)
                    raise etree.XMLSyntaxError, msg
            else:
                # Scientific or exponential notation: parse out coefficient, base and exponent
                coeffNode = numberNode.find('coeff')
                expNode = numberNode.find('exp')
                baseNode = numberNode.find('base')
                if coeffNode is None:
                    # Exponential
                    if baseNode is None:
                        baseText = utils.format_number('10')
                    else:
                        baseText = utils.format_number(baseNode.text.strip())
                    assert expNode is not None, etree.tostring(numberNode)
                    expText = utils.format_number(expNode.text.strip())
                    if latexMode:
                        dummyNode = etree.fromstring(r'<dummy>\text{' + baseText + r'}^{\text{' + expText + r'}}</dummy>')
                    else:
                        dummyNode = etree.fromstring('<dummy>' + baseText + '<sup>' + expText + '</sup></dummy>')
                else:
                    # Scientific notation or plain number (<coeff> only)
                    coeffText = utils.format_number(coeffNode.text.strip())
                    if expNode is None:
                        assert baseNode is None
                        try:
                            if latexMode:
                                dummyNode = etree.fromstring(r'<dummy>\text{' + coeffText + '}</dummy>')
                            else:
                                dummyNode = etree.fromstring('<dummy>' + coeffText + '</dummy>')
                        except etree.XMLSyntaxError, msg:
                            print repr(coeffText)
                            raise etree.XMLSyntaxError, msg
                    else:
                        if baseNode is None: