def populate_xml(xml_element: _Element, value: Any, *, locals: Optional[dict] = None) -> None: if isinstance(dict_ := value, dict): for k, v in dict_.items(): if v is None: # optional continue k = f.format(k, l=locals) xml_child = E(k) populate_xml(xml_child, v, locals=locals) if isinstance(v, Steam2Xml): def steam_to_xml(text: str) -> str: result = text url_format = f.format(g.xml_url_format, l={'url': r'\1', 'text': r'\2'}) result = re.sub(r'\[url=(.*?)](.*?)\[/url]', url_format, result, flags=re.DOTALL | re.IGNORECASE) u_format = f.format(g.xml_u_format, l={'text': r'\1'}) result = re.sub(r'\[u](.*?)\[/u]', u_format, result, flags=re.DOTALL | re.IGNORECASE) result = re.sub(r'\n\[img](.*?)\[/img]\n', '\n', result, flags=re.DOTALL | re.IGNORECASE) result = re.sub(r'\[img](.*?)\[/img]', '', result, flags=re.DOTALL | re.IGNORECASE) count = -1 while count != 0: result, count = re.subn(r'\[(\w+)(=\w+)?](.*?)\[/\1]', r'<\1\2>\3</\1>', result, flags=re.DOTALL) return result # noinspection PyTypeChecker xml_child.text = etree.CDATA(steam_to_xml(xml_child.text)) is_attribute = k.startswith('_') and len(xml_child) == 0 if is_attribute: xml_element.set(k[1:], f.format(str(v), l=locals)) else: xml_element.append(xml_child)
def add_obj_ref( tag_element: _Element, obj_ref_el_list: Iterable[_Element], adjacent_element: Optional[_Element], put_after_adjacent: bool = False, ) -> None: """ Add or move obj_ref elements in given tag element. tag_element -- the tag element to be updated with obj_ref elements obj_ref_el_list -- elements to be added or moved in the updated tag adjacent_element -- the element next to which the obj_ref elements will be put put_after_adjacent -- put elements after (True) or before (False) the adjacent element """ if adjacent_element is not None: move_elements( obj_ref_el_list, adjacent_element, put_after_adjacent=put_after_adjacent, ) else: for obj_ref in obj_ref_el_list: tag_element.append(obj_ref)
def add_iso_md_element(xml_obj: _Element, new_link: str): """ Adds a new MetadataURL element to the parent xml_obj Args: xml_obj (_Element): The parent xml object which holds all MetadataURL elements new_link (str): The link of the new metadata resource Returns: nothing """ iso_elem = etree.Element("MetadataURL", {"type": "ISO19115:2003"}, nsmap={"xlink": "http://www.w3.org/1999/xlink"}) iso_elem_format = etree.SubElement(iso_elem, "Format") iso_elem_format.text = "text/xml" iso_elem_resource = etree.SubElement( iso_elem, "OnlineResource", { "{http://www.w3.org/1999/xlink}type": "simple", "{http://www.w3.org/1999/xlink}href": new_link }) # try to append where other metadaURL elements might already exist # if there are no other elements -> just append it at the end other_iso_md_elements = try_get_element_from_xml("./MetadataURL", xml_obj) if len(other_iso_md_elements): index = 0 for other_iso in other_iso_md_elements: i = xml_obj.index(other_iso) if i > index: index = i index += 1 xml_obj.insert(index, iso_elem) else: xml_obj.append(iso_elem)
def move_field_from_element( original_parent: etree._Element, to_remove: str, replacement_parent: etree._Element = None, ) -> None: """ An XML utility function to move or remove a field from an element. If replacement_parent is None, this performs a removal. Otherwise, it performs a replacement. Args: original_parent (etree._Element): The initial parent of the field to be moved/removed. to_remove (str): The name of the field to be moved/removed. None means to move the root. None can only be used to move and not remove. replacement_parent (etree._Element, optional): The new parent for the field to be moved. Defaults to None. """ if to_remove is not None: elements_to_move = original_parent.findall(to_remove) if elements_to_move is not None and len(elements_to_move) > 0: for elem in elements_to_move: elem.getparent().remove(elem) if replacement_parent is not None: # Replace the element for elem in elements_to_move: replacement_parent.append(elem) else: # We want to move the original parent here. element_to_move = original_parent if replacement_parent is not None or to_remove is None: # Replace the element replacement_parent.append(element_to_move)
def _add_default_asserts_to_output(self, output: etree._Element) -> None: """Given a XML element (usually an output), this method adds the <assert_contents> tag with some examples of asserts. Args: output (etree._Element): The output XML element of the <test> that will contain the <assert_contents>. """ assert_contents = etree.Element(ASSERT_CONTENTS) has_text = etree.Element(HAS_TEXT) has_text.attrib[TEXT] = self._get_next_tabstop() assert_contents.append(has_text) has_line = etree.Element(HAS_LINE) has_line.attrib[LINE] = self._get_next_tabstop() assert_contents.append(has_line) has_line_matching = etree.Element(HAS_LINE_MATCHING) has_line_matching.attrib[EXPRESSION] = self._get_next_tabstop() assert_contents.append(has_line_matching) has_n_columns = etree.Element(HAS_N_COLUMNS) has_n_columns.attrib[N] = self._get_next_tabstop() assert_contents.append(has_n_columns) has_size = etree.Element(HAS_SIZE) has_size.attrib[VALUE] = self._get_next_tabstop() has_size.attrib[DELTA] = self._get_next_tabstop() assert_contents.append(has_size) output.append(assert_contents)
def _append_kobo_spans_from_text( self, node: etree._Element, text: str, name: str ) -> etree._Element: if not text: self.log.error(f"[{name}] No text passed, can't add spans") return False # if text is only whitespace, don't add spans if text.strip() == "": self.log.warning(f"[{name}] Found only whitespace, not adding spans") return False # split text in sentences groups = TEXT_SPLIT_RE.split(text) # remove empty strings resulting from split() groups = [g for g in groups if g != ""] # TODO: To match Kobo KePubs, the trailing whitespace needs to # be prepended to the next group. Probably equivalent to make # sure the space stays in the span at the end. # add each sentence in its own span segment_counter = 1 for g in groups: span = etree.Element( f"{{{XHTML_NAMESPACE}}}span", attrib={ "class": "koboSpan", "id": f"kobo.{self.paragraph_counter[name]}.{segment_counter}", }, ) span.text = g node.append(span) segment_counter += 1 return True
def add_data_requests(parent: _Element, participant: str) -> None: camera_node = Element("camera") camera_node.set("id", "egoFrontCamera_" + participant) camera_node.set("width", "160") camera_node.set("height", "120") camera_node.set("direction", "FRONT") camera_node.set("fov", "60") parent.append(camera_node)
def flatten_xml(xml: etree._Element): pages = xml.xpath('/pdf2xml/page') children = [] for page in pages: for child in page: children.append(child) xml.remove(page) for child in children: xml.append(child)
def set_thumbnail_item(self, item: etree._Element, img_url: str): enclosures = item.xpath(".//enclosure") enclosure: etree._Element if len(enclosures) > 0: enclosure = enclosures[0] else: enclosure = etree.Element("enclosure") item.append(enclosure) enclosure.attrib["url"] = img_url enclosure.attrib["type"] = "image/jpeg"
def move_resources_to_group( group_element: _Element, primitives_to_place: Iterable[_Element], adjacent_resource: Optional[_Element] = None, put_after_adjacent: bool = True, ) -> None: """ Put resources into a group or move them within their group There is a corner case which is not covered in this function. If the CIB contains references to a group or clone which this function deletes, they are not deleted and an invalid CIB is generated. These references can be constraints, fencing levels etc. - anything that contains group id of the deleted group. It is on the caller to detect this corner case and handle it appropriately (see group_add in lib/commands/resource.py). For future rewrites of this function, it would be better to ask for --force before deleting anything that user didn't explicitly ask for - like deleting the clone and its associated constraints. etree.Element group_element -- the group to put resources into iterable primitives_to_place -- resource elements to put into the group etree.Element adjacent_resource -- put resources beside this one if set bool put_after_adjacent -- put resources after or before the adjacent one """ for resource in primitives_to_place: old_parent = resource.getparent() # Move a resource to the group. if (adjacent_resource is not None and adjacent_resource.getnext() is not None and put_after_adjacent): adjacent_resource.getnext().addprevious(resource) # type: ignore adjacent_resource = resource elif adjacent_resource is not None and not put_after_adjacent: adjacent_resource.addprevious(resource) else: group_element.append(resource) adjacent_resource = resource # If the resource was the last resource in another group, that group is # now empty and must be deleted. If the group is in a clone element, # delete that as well. if (old_parent is not None and group.is_group( old_parent) # do not delete resources element and not group.get_inner_resources(old_parent)): old_grandparent = old_parent.getparent() if old_grandparent is not None: old_great_grandparent = old_grandparent.getparent() if (clone.is_any_clone(old_grandparent) and old_great_grandparent is not None): old_great_grandparent.remove(old_grandparent) else: old_grandparent.remove(old_parent)
def set_thumbnail_item(self, item: etree._Element, img_url: str): medias = item.xpath( ".//*[local-name()='thumbnail'][@url]", namespaces=NAMESPACES) media: etree._Element if len(medias) > 0: media = medias[0] else: media = etree.Element("{%s}thumbnail" % NAMESPACES["media"], nsmap=NAMESPACES) item.append(media) media.attrib["url"] = img_url
def add_data_requests(ai_tag: _Element, participant_id: str) -> None: from lxml.etree import Element camera = Element("camera") camera.set("id", "center_cam") camera.set("direction", "FRONT") camera.set("width", "320") camera.set("height", "160") camera.set("fov", "120") ai_tag.append(camera) speed = Element("speed") speed.set("id", "ego_speed") ai_tag.append(speed)
def _add_output_to_test(self, data: XmlElement, test_element: etree._Element) -> None: """Converts the given 'data' (<data>) XML element in an output XML element and adds it to the given <test> element. Args: output (XmlElement): The test_element (etree._Element): [description] """ name = data.get_attribute(NAME) if name: output_element = etree.Element(OUTPUT) output_element.attrib[NAME] = name self._add_default_asserts_to_output(output_element) test_element.append(output_element)
def get_settings(root: _Element = None) -> _Element: if root is None: root = E.LanguageData() for setting in g.settings: setting: dict assert setting['name'], setting assert 'title' in setting, setting # permit '' # setting['title'] = re.sub(r'\b\.$', '', setting['title']) xml_container = E('temp') populate_xml(xml_container, g.SettingLanguageData, locals=setting) for setting_element in xml_container: root.append(setting_element) return root
def _post_process_tweets(self, dom: etree._Element): """ Process tweets, to replace twitter url by tweets' content """ has_tweets: bool = False for a in xpath(dom, "//a[contains(@href,'https://twitter.com/')]|//a[contains(@href,'//twitter.com/')]"): m = re.match(TWEETS_REGEX, a.attrib["href"]) if m is not None: tweet_id: str = m.group(1) has_tweets = True script = etree.Element("script") script.text = """ window.addEventListener("DOMContentLoaded", function() { var tweet_%s = document.getElementById("tweet_%s"); twttr.widgets.createTweet( '%s', tweet_%s, { conversation : 'none', // or all cards : 'visible', theme : '%s' }); }); document.getElementById("parent-%s").style.display = "none"; """ % ( tweet_id, tweet_id, tweet_id, tweet_id, "dark" if "dark" in self.parameters and self.parameters[ "dark"] == "true" else "light", tweet_id ) tweet_div = etree.Element("div") tweet_div.set("id", "tweet_%s" % tweet_id) a.getparent().addnext(script) a.getparent().addnext(tweet_div) a.getparent().set("id", "parent-%s" % tweet_id) a.getparent().remove(a) if has_tweets: script = etree.Element("script") script.set("src", "https://platform.twitter.com/widgets.js") script.set("sync", "") dom.append(script)
def create_subelement(xml_elem: _Element, tag_name, after: str = None, attrib: dict = None, nsmap: dict = {}): """ Creates a new xml element as a child of xml_elem with the name tag_name Args: xml_elem: The xml element tag_name: The tag name for the new element after (str): The tag name of the element after which the new one should be inserted attrib: The attribute dict for the new element Returns: A new subelement of xml_elem """ ret_element = etree.Element(tag_name, attrib=attrib, nsmap=nsmap) if after is not None: after_element = try_get_single_element_from_xml("./{}".format(after), xml_elem) after_element_index = xml_elem.index(after_element) + 1 xml_elem.insert(after_element_index, ret_element) else: xml_elem.append(ret_element) return ret_element
def _add_output_collection_to_test(self, output_collection: XmlElement, test_element: etree._Element) -> None: """Adds the 'output_collection' XML element to the 'test_element' with a default <element>. Args: output_collection (XmlElement): The <collection> XML element. test_element (etree._Element): The <test> XML element. """ name = output_collection.get_attribute(NAME) if name: output_element = etree.Element(OUTPUT_COLLECTION) output_element.attrib[NAME] = name type_attr = output_collection.get_attribute(TYPE) if type_attr: output_element.attrib[TYPE] = type_attr element = etree.Element(ELEMENT) element.attrib[NAME] = self._get_next_tabstop() self._add_default_asserts_to_output(element) output_element.append(element) test_element.append(output_element)
def add_subelement(parent_elem: _Element, sub_element: _Element, after: str = None): """ Adds an existing xml element after Args: parent_elem: The parent xml element sub_element: The sub xml element Returns: parent_elem: The modified xml element, holding the subelement as a child """ if after is not None: after_element = try_get_single_element_from_xml("./{}".format(after), parent_elem) if after_element is None: # If this element could not be found, we append this element at the end after_element_index = -1 else: after_element_index = parent_elem.index(after_element) + 1 parent_elem.insert(after_element_index, sub_element) else: parent_elem.append(sub_element) return parent_elem
def add_italics(parent_element: etree._Element, title: str) -> None: """Appends the title to the parent_element and inserts emph elements if necessary.""" if "_" in title: if not title.count("_") // 2: raise ValueError( f"Unbalanced amount of italics indicators '_' in {title}") title_split = title.split("_") parent_element.text = title_split[0] # Insert a emph element for every odd index, skipping zero for index, string in enumerate(title_split[1:-1]): if not index // 2: emph = etree.Element("emph", {"render": "italic"}) emph.text = string emph.tail = title_split[index + 2] parent_element.append(emph) else: parent_element.text = title
def merge_nodes(src: etree._Element, dst: etree._Element): """ Merges the node ``src`` including their subelements to ``dst``. The Nodes are considered as equal - and thus merged - if their fully qualified names are identical. Different matching and merging strategies will be added as needed. """ def child_with_qname(element: etree._Element, qname: etree.QName): for child in element.iterchildren(qname.text): if etree.QName(child).text == qname.text: return child merged_elements = set() for child in dst.iterchildren(): twin = child_with_qname(src, etree.QName(child)) if twin is not None: merge_nodes(twin, child) merged_elements.add(twin) for child in src.iterchildren(): if child in merged_elements: continue dst.append(deepcopy(child))
def _add_thumbnail_in_description(self, item: etree._Element, description: etree._Element, parameters: Dict[str, str], thumbnail_url: str) -> str: img_url: str = thumbnail_url nsfw: str = "false" if "nsfw" not in parameters else parameters["nsfw"] if description.text is not None: description_thumbnail_url: str = self._get_thumbnail_url_from_description( description) if description_thumbnail_url == "": # if description does not have a picture, add one from enclosure or media:content tag if any title_node: etree._Element = cast(etree._Element, self.get_title(item)) if "translateto" in parameters: translate_dom(title_node, parameters["translateto"]) if img_url == "": # uses the ThumbnailHandler to fetch an image from google search images img_url = "%s/thumbnails?request=%s&blur=%s" % ( self.serving_url_prefix, quote_plus( re.sub(r"</?title[^>]*>", "", to_string(title_node)).strip()), nsfw) img = etree.Element("img") img.set("src", img_url) description.append(img) else: img_url = description_thumbnail_url # blur description images if nsfw == "true": self._manage_blur_image_link(item, description) return img_url
def _build_test_tree(self, input: InputNode, parent: etree._Element) -> None: """Recursively adds to the 'parent' XML element all the input nodes from the given 'input'. Args: input (InputNode): The InputNode to extract the node information. parent (etree._Element): The XML element that will contain the elements of the generated tree. """ for param in input.params: param_element = self._build_param_test_element(param) parent.append(param_element) for repeat in input.repeats: repeat_elements = self._build_min_repeat_test_elements(repeat) for rep in repeat_elements: parent.append(rep) for section in input.sections: section_element = self._build_section_test_element(section) parent.append(section_element)
def translate_dom(dom: etree._Element, dest_language: str, original_url: Optional[str] = None): if dest_language in LANGUAGES: translator = Translator() for node in dom.iter(): node.text = _translate(node.text, translator, dest_language) node.tail = _translate(node.tail, translator, dest_language) if original_url is not None: a = etree.Element("a") a.text = "Untranslated" a.attrib["href"] = original_url\ .replace("&translateto=", "&nop=")\ .replace("?translateto=", "?nop=") # quick & dirty dom.append(etree.Element("hr")) dom.append(a) dom.append(etree.Element("hr"))
def _add_kobo_spans_to_node( self, node: etree._Element, name: str ) -> etree._Element: # process node only if it is not a comment or a processing instruction if ( node is None or isinstance(node, etree._Comment) or isinstance(node, etree._ProcessingInstruction) ): if node is not None: node.tail = None self.log.debug(f"[{name}] Skipping comment/ProcessingInstruction node") return node # Special case some tags special_tag_match = re.search(r"^(?:\{[^\}]+\})?(\w+)$", node.tag) if special_tag_match: # Skipped tags are just flat out skipped if special_tag_match.group(1) in SKIPPED_TAGS: self.log.debug(f"[{name}] Skipping '{special_tag_match.group(1)}' tag") return node # Special tags get wrapped in a span and their children are ignored if special_tag_match.group(1) in SPECIAL_TAGS: self.log.debug( f"[{name}] Wrapping '{special_tag_match.group(1)}' tag and " + "ignoring children" ) span = etree.Element( f"{{{XHTML_NAMESPACE}}}span", attrib={ "id": f"kobo.{self.paragraph_counter[name]}.1", "class": "koboSpan", }, ) span.append(node) return span # save node content for later node_text = node.text node_children = deepcopy(node.getchildren()) node_attrs = {} for key in list(node.keys()): node_attrs[key] = node.get(key) # reset current node, to start from scratch node.clear() # restore node attributes for key in node_attrs: node.set(key, node_attrs[key]) # the node text is converted to spans if node_text is not None: if not self._append_kobo_spans_from_text(node, node_text, name): # didn't add spans, restore text node.text = node_text else: self.paragraph_counter[name] += 1 # re-add the node children for child in node_children: # save child tail for later child_tail = child.tail child.tail = None node.append(self._add_kobo_spans_to_node(child, name)) # the child tail is converted to spans if child_tail is not None: if not self._append_kobo_spans_from_text(node, child_tail, name): # didn't add spans, restore tail on last child node[-1].tail = child_tail else: self.paragraph_counter[name] += 1 return node