Ejemplo n.º 1
0
    def populate_xml(xml_element: _Element, value: Any, *, locals: Optional[dict] = None) -> None:
        if isinstance(dict_ := value, dict):
            for k, v in dict_.items():
                if v is None:  # optional
                    continue
                k = f.format(k, l=locals)
                xml_child = E(k)
                populate_xml(xml_child, v, locals=locals)

                if isinstance(v, Steam2Xml):
                    def steam_to_xml(text: str) -> str:
                        result = text
                        url_format = f.format(g.xml_url_format, l={'url': r'\1', 'text': r'\2'})
                        result = re.sub(r'\[url=(.*?)](.*?)\[/url]', url_format, result, flags=re.DOTALL | re.IGNORECASE)
                        u_format = f.format(g.xml_u_format, l={'text': r'\1'})
                        result = re.sub(r'\[u](.*?)\[/u]', u_format, result, flags=re.DOTALL | re.IGNORECASE)

                        result = re.sub(r'\n\[img](.*?)\[/img]\n', '\n', result, flags=re.DOTALL | re.IGNORECASE)
                        result = re.sub(r'\[img](.*?)\[/img]', '', result, flags=re.DOTALL | re.IGNORECASE)

                        count = -1
                        while count != 0:
                            result, count = re.subn(r'\[(\w+)(=\w+)?](.*?)\[/\1]', r'<\1\2>\3</\1>', result, flags=re.DOTALL)
                        return result

                    # noinspection PyTypeChecker
                    xml_child.text = etree.CDATA(steam_to_xml(xml_child.text))

                is_attribute = k.startswith('_') and len(xml_child) == 0
                if is_attribute:
                    xml_element.set(k[1:], f.format(str(v), l=locals))
                else:
                    xml_element.append(xml_child)
Ejemplo n.º 2
0
    def render(
        self,
        node: etree._Element,
        value: typing.Union[list, dict, CompoundValue],
        xsd_type: "ComplexType" = None,
        render_path=None,
    ) -> None:
        assert xsd_type is None

        if value is Nil:
            node.set(xsi_ns("nil"), "true")
            return
        node.text = self.xmlvalue(value)
Ejemplo n.º 3
0
 def wrap_tei_node_in_ancestors(self, node: etree._Element, wrapped_node: etree._Element):
     """Recursively wraps a tei node in its (non-technical) ancestor nodes.
     @param node: the node in the original tree, required for navigating the tree towards the top
     @param wrapped_node: a copy of the original node or its wrapping, which will eventually be returned
     """
     ancestors = node.xpath('ancestor::*[not(self::tei:TEI or self::tei:text[@type = "work_part"])]', namespaces=xml_ns)
     if len(ancestors):
         ancestor = ancestors[-1]  # ancestors are in document order
         wrap = etree.Element(etree.QName(ancestor).localname)
         copy_attributes(ancestor, wrap)
         wrap.append(wrapped_node)
         return self.wrap_tei_node_in_ancestors(ancestor, wrap)
     else:
         # declare the tei namespace on the root element
         wrapped_node.set('xmlns', xml_ns['tei'])
         return wrapped_node
def parse(
    node: etree._Element,
    all_tokens: list,
    n_prev_tokens=0,
):
    n_tokens = 0
    for e in node.xpath("./text()|*"):
        if type(e) is etree._ElementUnicodeResult:
            tokens = sum(annotator.tokenize(re.sub('&amp;', '&', str(e))), [])
            all_tokens.extend(tokens)
            n_tokens += len(tokens)
        else:
            n_tokens += parse(e,
                              all_tokens=all_tokens,
                              n_prev_tokens=n_prev_tokens + n_tokens)

    node.set('start_pos', str(n_prev_tokens))
    node.set('end_pos', str(n_prev_tokens + n_tokens))

    return n_tokens
Ejemplo n.º 5
0
    def render(
        self,
        node: etree._Element,
        value: typing.Union[list, dict, CompoundValue],
        xsd_type: "ComplexType" = None,
        render_path=None,
    ) -> None:
        assert xsd_type is None

        if isinstance(value, AnyObject):
            if value.xsd_type is None:
                node.set(xsi_ns("nil"), "true")
            else:
                value.xsd_type.render(node, value.value, None, render_path)
                node.set(xsi_ns("type"), value.xsd_type.qname)
        elif isinstance(value, CompoundValue):
            value._xsd_elm.render(node, value, render_path)
            node.set(xsi_ns("type"), value._xsd_elm.qname)
        else:
            node.text = self.xmlvalue(value)
Ejemplo n.º 6
0
    def render(
        self,
        node: etree._Element,
        value: typing.Union[list, dict, CompoundValue],
        xsd_type: "ComplexType" = None,
        render_path=None,
    ) -> None:
        """Serialize the given value lxml.Element subelements on the node
        element.

        :param render_path: list

        """
        if not render_path:
            render_path = [self.name]

        if not self.elements_nested and not self.attributes:
            return

        # TODO: Implement test case for this
        if value is None:
            value = {}

        if isinstance(value, ArrayValue):
            value = value.as_value_object()

        # Render attributes
        for name, attribute in self.attributes:
            attr_value = value[name] if name in value else NotSet
            child_path = render_path + [name]
            attribute.render(node, attr_value, child_path)

        if (
            len(self.elements_nested) == 1
            and isinstance(value, tuple(self.accepted_types))
            and not isinstance(value, (list, dict, CompoundValue))
        ):
            element = self.elements_nested[0][1]
            element.type.render(node, value, None, child_path)
            return

        # Render sub elements
        for name, element in self.elements_nested:
            if isinstance(element, Element) or element.accepts_multiple:
                element_value = value[name] if name in value else NotSet
                child_path = render_path + [name]
            else:
                element_value = value
                child_path = list(render_path)

            # We want to explicitly skip this sub-element
            if element_value is SkipValue:
                continue

            if isinstance(element, Element):
                element.type.render(node, element_value, None, child_path)
            else:
                element.render(node, element_value, child_path)

        if xsd_type:
            if xsd_type._xsd_name:
                node.set(xsi_ns("type"), xsd_type._xsd_name)
            if xsd_type.qname:
                node.set(xsi_ns("type"), xsd_type.qname)
Ejemplo n.º 7
0
def put_attr(e: ET._Element, set_val):
    if e.get("status") is None:
        e.set("status", set_val)
Ejemplo n.º 8
0
def copy_attributes(from_elem: etree._Element, to_elem: etree._Element):
    for name, value in from_elem.items():
        to_elem.set(name, value)
Ejemplo n.º 9
0
    def _add_kobo_spans_to_node(
        self, node: etree._Element, name: str
    ) -> etree._Element:
        # process node only if it is not a comment or a processing instruction
        if (
            node is None
            or isinstance(node, etree._Comment)
            or isinstance(node, etree._ProcessingInstruction)
        ):
            if node is not None:
                node.tail = None
            self.log.debug(f"[{name}] Skipping comment/ProcessingInstruction node")
            return node

        # Special case some tags
        special_tag_match = re.search(r"^(?:\{[^\}]+\})?(\w+)$", node.tag)
        if special_tag_match:
            # Skipped tags are just flat out skipped
            if special_tag_match.group(1) in SKIPPED_TAGS:
                self.log.debug(f"[{name}] Skipping '{special_tag_match.group(1)}' tag")
                return node

            # Special tags get wrapped in a span and their children are ignored
            if special_tag_match.group(1) in SPECIAL_TAGS:
                self.log.debug(
                    f"[{name}] Wrapping '{special_tag_match.group(1)}' tag and "
                    + "ignoring children"
                )
                span = etree.Element(
                    f"{{{XHTML_NAMESPACE}}}span",
                    attrib={
                        "id": f"kobo.{self.paragraph_counter[name]}.1",
                        "class": "koboSpan",
                    },
                )
                span.append(node)
                return span

        # save node content for later
        node_text = node.text
        node_children = deepcopy(node.getchildren())
        node_attrs = {}
        for key in list(node.keys()):
            node_attrs[key] = node.get(key)

        # reset current node, to start from scratch
        node.clear()

        # restore node attributes
        for key in node_attrs:
            node.set(key, node_attrs[key])

        # the node text is converted to spans
        if node_text is not None:
            if not self._append_kobo_spans_from_text(node, node_text, name):
                # didn't add spans, restore text
                node.text = node_text
            else:
                self.paragraph_counter[name] += 1

        # re-add the node children
        for child in node_children:
            # save child tail for later
            child_tail = child.tail
            child.tail = None
            node.append(self._add_kobo_spans_to_node(child, name))
            # the child tail is converted to spans
            if child_tail is not None:
                if not self._append_kobo_spans_from_text(node, child_tail, name):
                    # didn't add spans, restore tail on last child
                    node[-1].tail = child_tail
                else:
                    self.paragraph_counter[name] += 1

        return node