def populate_xml(xml_element: _Element, value: Any, *, locals: Optional[dict] = None) -> None: if isinstance(dict_ := value, dict): for k, v in dict_.items(): if v is None: # optional continue k = f.format(k, l=locals) xml_child = E(k) populate_xml(xml_child, v, locals=locals) if isinstance(v, Steam2Xml): def steam_to_xml(text: str) -> str: result = text url_format = f.format(g.xml_url_format, l={'url': r'\1', 'text': r'\2'}) result = re.sub(r'\[url=(.*?)](.*?)\[/url]', url_format, result, flags=re.DOTALL | re.IGNORECASE) u_format = f.format(g.xml_u_format, l={'text': r'\1'}) result = re.sub(r'\[u](.*?)\[/u]', u_format, result, flags=re.DOTALL | re.IGNORECASE) result = re.sub(r'\n\[img](.*?)\[/img]\n', '\n', result, flags=re.DOTALL | re.IGNORECASE) result = re.sub(r'\[img](.*?)\[/img]', '', result, flags=re.DOTALL | re.IGNORECASE) count = -1 while count != 0: result, count = re.subn(r'\[(\w+)(=\w+)?](.*?)\[/\1]', r'<\1\2>\3</\1>', result, flags=re.DOTALL) return result # noinspection PyTypeChecker xml_child.text = etree.CDATA(steam_to_xml(xml_child.text)) is_attribute = k.startswith('_') and len(xml_child) == 0 if is_attribute: xml_element.set(k[1:], f.format(str(v), l=locals)) else: xml_element.append(xml_child)
def render( self, node: etree._Element, value: typing.Union[list, dict, CompoundValue], xsd_type: "ComplexType" = None, render_path=None, ) -> None: assert xsd_type is None if value is Nil: node.set(xsi_ns("nil"), "true") return node.text = self.xmlvalue(value)
def wrap_tei_node_in_ancestors(self, node: etree._Element, wrapped_node: etree._Element): """Recursively wraps a tei node in its (non-technical) ancestor nodes. @param node: the node in the original tree, required for navigating the tree towards the top @param wrapped_node: a copy of the original node or its wrapping, which will eventually be returned """ ancestors = node.xpath('ancestor::*[not(self::tei:TEI or self::tei:text[@type = "work_part"])]', namespaces=xml_ns) if len(ancestors): ancestor = ancestors[-1] # ancestors are in document order wrap = etree.Element(etree.QName(ancestor).localname) copy_attributes(ancestor, wrap) wrap.append(wrapped_node) return self.wrap_tei_node_in_ancestors(ancestor, wrap) else: # declare the tei namespace on the root element wrapped_node.set('xmlns', xml_ns['tei']) return wrapped_node
def parse( node: etree._Element, all_tokens: list, n_prev_tokens=0, ): n_tokens = 0 for e in node.xpath("./text()|*"): if type(e) is etree._ElementUnicodeResult: tokens = sum(annotator.tokenize(re.sub('&', '&', str(e))), []) all_tokens.extend(tokens) n_tokens += len(tokens) else: n_tokens += parse(e, all_tokens=all_tokens, n_prev_tokens=n_prev_tokens + n_tokens) node.set('start_pos', str(n_prev_tokens)) node.set('end_pos', str(n_prev_tokens + n_tokens)) return n_tokens
def render( self, node: etree._Element, value: typing.Union[list, dict, CompoundValue], xsd_type: "ComplexType" = None, render_path=None, ) -> None: assert xsd_type is None if isinstance(value, AnyObject): if value.xsd_type is None: node.set(xsi_ns("nil"), "true") else: value.xsd_type.render(node, value.value, None, render_path) node.set(xsi_ns("type"), value.xsd_type.qname) elif isinstance(value, CompoundValue): value._xsd_elm.render(node, value, render_path) node.set(xsi_ns("type"), value._xsd_elm.qname) else: node.text = self.xmlvalue(value)
def render( self, node: etree._Element, value: typing.Union[list, dict, CompoundValue], xsd_type: "ComplexType" = None, render_path=None, ) -> None: """Serialize the given value lxml.Element subelements on the node element. :param render_path: list """ if not render_path: render_path = [self.name] if not self.elements_nested and not self.attributes: return # TODO: Implement test case for this if value is None: value = {} if isinstance(value, ArrayValue): value = value.as_value_object() # Render attributes for name, attribute in self.attributes: attr_value = value[name] if name in value else NotSet child_path = render_path + [name] attribute.render(node, attr_value, child_path) if ( len(self.elements_nested) == 1 and isinstance(value, tuple(self.accepted_types)) and not isinstance(value, (list, dict, CompoundValue)) ): element = self.elements_nested[0][1] element.type.render(node, value, None, child_path) return # Render sub elements for name, element in self.elements_nested: if isinstance(element, Element) or element.accepts_multiple: element_value = value[name] if name in value else NotSet child_path = render_path + [name] else: element_value = value child_path = list(render_path) # We want to explicitly skip this sub-element if element_value is SkipValue: continue if isinstance(element, Element): element.type.render(node, element_value, None, child_path) else: element.render(node, element_value, child_path) if xsd_type: if xsd_type._xsd_name: node.set(xsi_ns("type"), xsd_type._xsd_name) if xsd_type.qname: node.set(xsi_ns("type"), xsd_type.qname)
def put_attr(e: ET._Element, set_val): if e.get("status") is None: e.set("status", set_val)
def copy_attributes(from_elem: etree._Element, to_elem: etree._Element): for name, value in from_elem.items(): to_elem.set(name, value)
def _add_kobo_spans_to_node( self, node: etree._Element, name: str ) -> etree._Element: # process node only if it is not a comment or a processing instruction if ( node is None or isinstance(node, etree._Comment) or isinstance(node, etree._ProcessingInstruction) ): if node is not None: node.tail = None self.log.debug(f"[{name}] Skipping comment/ProcessingInstruction node") return node # Special case some tags special_tag_match = re.search(r"^(?:\{[^\}]+\})?(\w+)$", node.tag) if special_tag_match: # Skipped tags are just flat out skipped if special_tag_match.group(1) in SKIPPED_TAGS: self.log.debug(f"[{name}] Skipping '{special_tag_match.group(1)}' tag") return node # Special tags get wrapped in a span and their children are ignored if special_tag_match.group(1) in SPECIAL_TAGS: self.log.debug( f"[{name}] Wrapping '{special_tag_match.group(1)}' tag and " + "ignoring children" ) span = etree.Element( f"{{{XHTML_NAMESPACE}}}span", attrib={ "id": f"kobo.{self.paragraph_counter[name]}.1", "class": "koboSpan", }, ) span.append(node) return span # save node content for later node_text = node.text node_children = deepcopy(node.getchildren()) node_attrs = {} for key in list(node.keys()): node_attrs[key] = node.get(key) # reset current node, to start from scratch node.clear() # restore node attributes for key in node_attrs: node.set(key, node_attrs[key]) # the node text is converted to spans if node_text is not None: if not self._append_kobo_spans_from_text(node, node_text, name): # didn't add spans, restore text node.text = node_text else: self.paragraph_counter[name] += 1 # re-add the node children for child in node_children: # save child tail for later child_tail = child.tail child.tail = None node.append(self._add_kobo_spans_to_node(child, name)) # the child tail is converted to spans if child_tail is not None: if not self._append_kobo_spans_from_text(node, child_tail, name): # didn't add spans, restore tail on last child node[-1].tail = child_tail else: self.paragraph_counter[name] += 1 return node