コード例 #1
0
def _get_single_selector_str(element: _Element) -> str:
    """根据一个 lxml element 对象,得到尽可能唯一识别该 element 的 jquery 表达
        如果 tag 有 id , 则 用 id 做标记
        如果 tag 有 class 对象,则 class 进行约定
        如果 tag 有其他属性值,则用这些属性值来定位
    """
    if element.get('id'):
        return '#' + element.get('id')
    elif element.get('class'):
        return element.tag + '.' + '.'.join(element.get('class').split())
    elif len(element.keys()):
        return '[' + element.keys()[0] + '=' + element.get(
            element.keys()[0]) + ']'
    else:
        return element.tag
コード例 #2
0
    def transform_element(self, element: Element, /):

        match element.tag:

            case "char":
                self.transform_char(element)

            case ("h1" | "figcaption" | "a") as tag if (mode := "numbering") in element.keys():

                self.expand_placeholder_in_element(element, mode)

                if tag == "a":

                    wrap = element.makeelement("cite", {}, None)
                    wrap.tail, element.tail = element.tail, None  # type: ignore

                    element.getparent().replace(element, wrap)
                    wrap.append(element)
コード例 #3
0
def get_field_mfd(sensor: etree._Element) -> dict:
    """
        Returns a dictionary with all the attributes for specific sensor element
    """
    return {key: sensor.get(key) for key in sensor.keys()}
コード例 #4
0
    def _add_kobo_spans_to_node(
        self, node: etree._Element, name: str
    ) -> etree._Element:
        # process node only if it is not a comment or a processing instruction
        if (
            node is None
            or isinstance(node, etree._Comment)
            or isinstance(node, etree._ProcessingInstruction)
        ):
            if node is not None:
                node.tail = None
            self.log.debug(f"[{name}] Skipping comment/ProcessingInstruction node")
            return node

        # Special case some tags
        special_tag_match = re.search(r"^(?:\{[^\}]+\})?(\w+)$", node.tag)
        if special_tag_match:
            # Skipped tags are just flat out skipped
            if special_tag_match.group(1) in SKIPPED_TAGS:
                self.log.debug(f"[{name}] Skipping '{special_tag_match.group(1)}' tag")
                return node

            # Special tags get wrapped in a span and their children are ignored
            if special_tag_match.group(1) in SPECIAL_TAGS:
                self.log.debug(
                    f"[{name}] Wrapping '{special_tag_match.group(1)}' tag and "
                    + "ignoring children"
                )
                span = etree.Element(
                    f"{{{XHTML_NAMESPACE}}}span",
                    attrib={
                        "id": f"kobo.{self.paragraph_counter[name]}.1",
                        "class": "koboSpan",
                    },
                )
                span.append(node)
                return span

        # save node content for later
        node_text = node.text
        node_children = deepcopy(node.getchildren())
        node_attrs = {}
        for key in list(node.keys()):
            node_attrs[key] = node.get(key)

        # reset current node, to start from scratch
        node.clear()

        # restore node attributes
        for key in node_attrs:
            node.set(key, node_attrs[key])

        # the node text is converted to spans
        if node_text is not None:
            if not self._append_kobo_spans_from_text(node, node_text, name):
                # didn't add spans, restore text
                node.text = node_text
            else:
                self.paragraph_counter[name] += 1

        # re-add the node children
        for child in node_children:
            # save child tail for later
            child_tail = child.tail
            child.tail = None
            node.append(self._add_kobo_spans_to_node(child, name))
            # the child tail is converted to spans
            if child_tail is not None:
                if not self._append_kobo_spans_from_text(node, child_tail, name):
                    # didn't add spans, restore tail on last child
                    node[-1].tail = child_tail
                else:
                    self.paragraph_counter[name] += 1

        return node