def _get_single_selector_str(element: _Element) -> str: """根据一个 lxml element 对象,得到尽可能唯一识别该 element 的 jquery 表达 如果 tag 有 id , 则 用 id 做标记 如果 tag 有 class 对象,则 class 进行约定 如果 tag 有其他属性值,则用这些属性值来定位 """ if element.get('id'): return '#' + element.get('id') elif element.get('class'): return element.tag + '.' + '.'.join(element.get('class').split()) elif len(element.keys()): return '[' + element.keys()[0] + '=' + element.get( element.keys()[0]) + ']' else: return element.tag
def transform_element(self, element: Element, /): match element.tag: case "char": self.transform_char(element) case ("h1" | "figcaption" | "a") as tag if (mode := "numbering") in element.keys(): self.expand_placeholder_in_element(element, mode) if tag == "a": wrap = element.makeelement("cite", {}, None) wrap.tail, element.tail = element.tail, None # type: ignore element.getparent().replace(element, wrap) wrap.append(element)
def get_field_mfd(sensor: etree._Element) -> dict: """ Returns a dictionary with all the attributes for specific sensor element """ return {key: sensor.get(key) for key in sensor.keys()}
def _add_kobo_spans_to_node( self, node: etree._Element, name: str ) -> etree._Element: # process node only if it is not a comment or a processing instruction if ( node is None or isinstance(node, etree._Comment) or isinstance(node, etree._ProcessingInstruction) ): if node is not None: node.tail = None self.log.debug(f"[{name}] Skipping comment/ProcessingInstruction node") return node # Special case some tags special_tag_match = re.search(r"^(?:\{[^\}]+\})?(\w+)$", node.tag) if special_tag_match: # Skipped tags are just flat out skipped if special_tag_match.group(1) in SKIPPED_TAGS: self.log.debug(f"[{name}] Skipping '{special_tag_match.group(1)}' tag") return node # Special tags get wrapped in a span and their children are ignored if special_tag_match.group(1) in SPECIAL_TAGS: self.log.debug( f"[{name}] Wrapping '{special_tag_match.group(1)}' tag and " + "ignoring children" ) span = etree.Element( f"{{{XHTML_NAMESPACE}}}span", attrib={ "id": f"kobo.{self.paragraph_counter[name]}.1", "class": "koboSpan", }, ) span.append(node) return span # save node content for later node_text = node.text node_children = deepcopy(node.getchildren()) node_attrs = {} for key in list(node.keys()): node_attrs[key] = node.get(key) # reset current node, to start from scratch node.clear() # restore node attributes for key in node_attrs: node.set(key, node_attrs[key]) # the node text is converted to spans if node_text is not None: if not self._append_kobo_spans_from_text(node, node_text, name): # didn't add spans, restore text node.text = node_text else: self.paragraph_counter[name] += 1 # re-add the node children for child in node_children: # save child tail for later child_tail = child.tail child.tail = None node.append(self._add_kobo_spans_to_node(child, name)) # the child tail is converted to spans if child_tail is not None: if not self._append_kobo_spans_from_text(node, child_tail, name): # didn't add spans, restore tail on last child node[-1].tail = child_tail else: self.paragraph_counter[name] += 1 return node