def get_node_coordinates(node: etree._Element, root: etree._Element = None, node_id: str = None) -> (float, float): ''' finding a node coordinates :param node: etree node for which to get coordinates :param root: etree root (optional, needed only for <way /> tags) :param node_id: optional parameter, the <node /> id reference from where to look for coordinates :return: returns a tuple (lat, lon) ''' if node.tag == 'node': return float(node.attrib['lat']), float(node.attrib['lon']) elif node.tag == 'way': if node_id is None: node_id = node.getchildren()[0].attrib['ref'] if root is not None: for child in root.getchildren(): if (child.tag == 'node') and (child.attrib['id'] == node_id): return float(child.attrib['lat']), float( child.attrib['lon']) else: return -2, -2 else: return -1, -1
def rewrap(tag: str, source: Element) -> Element: """copy all the contents of one element into another""" new = element(tag) new.text = source.text for e in source.getchildren(): new.append(deepcopy(e)) return new
def getMethodNamesDict(elem: Element) -> Dict[str, str]: methods = {} # type: Dict[str, Element] if getElemTag(elem) == "method": getMethodElemNamesDict(elem, methods) return methods for child in elem.getchildren(): getMethodElemNamesDict(child, methods) return methods
def get_node_name(node: etree._Element) -> str: ''' finding a node name :param node: etree node for wich to find a name (of a place) :return: returns a name ''' for child in node.getchildren(): if ('k' in child.attrib.keys()) and (child.attrib['k'] == 'name'): return child.attrib['v']
def walk(e: etree._Element, l: ModelListener) -> None: tag = e.tag l.call('enter_every_before', e) l.call('enter_' + tag, e) l.call('enter_every_after', e) for c in e.getchildren(): walk(c, l) l.call('exit_every_before', e) l.call('exit_' + tag, e) l.call('exit_every_after', e)
def getParamCompleter(elem: Element) -> Optional[WordCompleter]: options = [] # type: List[str] for child in elem.getchildren(): if getElemTag(child) == "option": value = child.get("value", None) if value: options.append(value) if not options: return None return WordCompleter( options, ignore_case=False, )
def elemChildOptions( elem: Element) -> Tuple[Dict[str, Element], Dict[str, Element], ]: options = {} # type: Dict[str, Element] optionsMinimal = {} # type: Dict[str, Element] for child in elem.getchildren(): keys = elemKeys(child, elem) if not keys: continue optionsMinimal[keys[0]] = child for key in keys: if key.strip("/") in pathGeneralizeIdByPath: continue options[key] = child options[key.lower()] = child return options, optionsMinimal
def get_element_str(html: _Element) -> str: result = [] if html.text: result.append(html.text) for child in html.getchildren(): if child.text: result.append(child.text) if child.tail: result.append(child.tail) if html.tail: result.append(html.tail) return ''.join(result).strip()
def askJsonParams( self, requestElem: Element, path: str, data: Dict[str, Any], ) -> Optional[str]: """ recursive function to ask all json/body parameters updates `data` argument returns error or None """ # FIXME: do we need the path for child in requestElem.getchildren(): t = getElemTag(child) if t == "param": name = child.get("name", "") if not name: print("WARNING: element %r with tag %r has no name", child, t) continue typ = child.get("type", "") if not typ: print("WARNING: element %r with tag %r has no type", child, t) continue completer = getParamCompleter(child) multiline = child.get("multiline", "") == "true" history = None if child.get("secret", "") != "true": history = FileHistory(self.paramHistoryPath(name)) try: valueRaw = prompt( f"> Parameter: {name} = ", multiline=multiline, history=history, auto_suggest=AutoSuggestFromHistory(), completer=completer, ) except KeyboardInterrupt: return "Canceled" if valueRaw != "": value, err = parseInputValue(valueRaw, typ) if err: return err data[name] = value
def printElem(elem: Element, level: int): tag = getElemTag(elem) prefix = indent * level + getElemTag(elem) if tag == "resource": print(f"{prefix}: {elemPath(elem)}") elif tag == "method": print(f"{prefix}: {elemName(elem)} ({elemID(elem)})") elif tag == "param" or tag == "element": print(f"{prefix}: {elemName(elem)} (type={elemType(elem)})") elif tag == "item": print(f"{prefix} (type={elemType(elem)})") elif tag == "option": print(f"{prefix}: {elemValue(elem)}") elif tag == "representation": pass else: print(prefix) for child in elem.getchildren(): printElem(child, level + 1)
def __init__(self, doc: Element): children = doc.getchildren() for key, value in self.__annotations__.items(): if is_combinator(value): setattr(self, key, value(children)) else: if key in doc.attrib: setattr(self, key, value(doc.attrib[key])) elif is_entity(value): dummy = DummyXMLElement(key) for child in filter(dummy.match_tag, children): setattr(self, key, value(child.text.strip())) break else: setattr(self, key, None) elif key in self.__dict__: setattr(self, key, self.__dict__[key]) else: setattr(self, key, None)
def uninent_block_literal_el(el: ET._Element): if (0 == len(el.getchildren())) and (el.text is None): return el def get_inner_content_xml(el_): s = ET.tostring(el_, encoding="unicode", xml_declaration=False) # lxml provides no way to extract literal XML of node content # excluding the node itself m = re.match('([^>]*>)(.+)(<[^>]+>)\\s*$', s, re.DOTALL) return m.groups() start, contents, end = get_inner_content_xml(el) lines = rm_lead_trail_empty_lines(contents) lines = unindent(lines) xml_s = start + "\n".join(lines) + end xml = ET.XML(xml_s) return xml
def xml_to_tree(x: etree._Element, lvl: int = 1, preserve_ns: bool = False, text_strip: bool = True): # print(inspect.currentframe().f_code.co_name) if preserve_ns: tag = x.tag else: tag = re.sub(r'\{.*\}(.*)', r'\1', x.tag) if text_strip and x.text: text = x.text.strip() else: text = x.text if x.text else "" yield f"{' ' * (lvl - 1)}|--Tag: {tag:<}" \ f"{' ' * (lvl - 1)}| Text: {text:<}" for ch in x.getchildren(): yield from xml_to_tree(ch, lvl + 1, preserve_ns, text_strip)
def getChildrenWithTag(elem: Element, tag: str) -> Element: return [child for child in elem.getchildren() if getElemTag(child) == tag]
def _process_elem(self, parent_state: PTState, t_elem: etree._Element): if isinstance(t_elem, etree._Comment): return self._ext.set_elem_context(t_elem) qname = etree.QName(t_elem.tag) state = PTState(parent_state, t_elem) if state["reorder"]: self._reorder.append(state) # duplicate subtree for each source if len(state["sources"].secondary): # prevent triggering this processing branch on sibling passes del t_elem.attrib[self._pt_clark("sources")] # We temporarily detach the t_elem subtree and insert each elem subtree at # the original location of t_elem before populating, which ensures that # resolved paths are always in the form /path/to/elem[1]/child, which will # match corresponding source elements (e.g. /path/to/elem/child) in the # multi source fetch scenario. Caveat: downstream deferred pt:fill or # pt:required will be evaluated in the context of their element's final # path (e.g. /path/to/elem[3]/child). # # Inserting and populating the subtrees in reverse order ensures that their # final document order for multi source fetches is aligned with the order of # the source_map sources. parent = t_elem.getparent() idx = parent.index(t_elem) parent.remove(t_elem) for source in reversed( (state["sources"].primary, *state["sources"].secondary) ): elem = ( t_elem if source is state["sources"].primary else deepcopy(t_elem) ) state["sources"] = SourceGroup(source) parent.insert(idx, elem) self._process_elem(state, elem) return if state["fetch"]: path = self.label.getelementpath(t_elem) s_elems = state["sources"].primary.findall(path) if len(s_elems) > 1: if state["multi"] is not True and len(s_elems) != state["multi"]: raise PTFetchError( f"{len(s_elems)} source elements found but pt:multi is set to" f" expect {int(state['multi'])}", t_elem, ) # cast False to 0 for readability self._process_multi_branch(t_elem, parent_state, len(s_elems) - 1) return elif not len(s_elems): if state["required"]: url = state["sources"].primary.docinfo.URL source_file = ( Path(url).name if url is not None else "<unresolved filename>" ) raise PTFetchError( f"{qname.localname} could not be located at path {path} in" f" source {state.exp['sources']} from {source_file}", # FIXME: .exp is None in descendants where source is inherited... t_elem, ) t_elem.getparent().remove(t_elem) return elif not len(t_elem): # len(s_elems) == 1: t_elem.attrib.update(s_elems[0].attrib) t_elem.text = s_elems[0].text else: if isinstance(state["multi"], int) and state["multi"] > 1: self._process_multi_branch(t_elem, parent_state, state["multi"] - 1) return # non-fetch required condition; should be evaluated at export if state.exp["required"] is not None: self._deferred_reqs.append(state) if len(t_elem): for child_elem in t_elem.getchildren(): self._process_elem(state, child_elem) elif state.exp["fill"]: if state["defer"]: self._deferred_fills.append(state) else: self._handle_fill(state.t_elem, state.eval_deferred("fill")) state.remove_elem_pt_attrs()
def _parse_sentence_to_token_sequence(self, xml_node_sentence: _Element) -> List[Tuple[str]]: ret = list(map(self._parse_token, xml_node_sentence.getchildren())) return ret
def _xml_node_to_dict_value(self, node: ET._Element, item_dict_nodes=None): """ 将指定节点通过递归生成存入dict的key-value值 @param {ET._Element} node - 要生成key-value的节点 @param {list} item_dict_nodes = None - 指定list和tuple情况下,使用字典作为列表项的节点清单(Element) @return {tuple} - 返回 key, value 值,如果是注释返回 None, None """ if not (hasattr(node, 'tag') and type(node.tag) == str): # 不是节点的情况(例如注释),直接返回None return None, None _key = node.tag _value = None _type = None # 处理数据类型的判断 if 'type' in node.attrib.keys(): _type = node.attrib['type'] if _type not in ('dict', 'list', 'tuple', 'bool', 'int', 'float', 'string'): # 非标准类型,当作没有传入,重新再处理 _type = None _childs = None # 子节点清单 if _type is None: _childs = node.getchildren() if len(_childs) > 0: # 只要子节点有重复的tag,则认为一定是list _tag_list = list() _type = 'dict' for _childnode in _childs: if not (hasattr(_childnode, 'tag') and type(_childnode.tag) == str): # 注释 continue if _childnode.tag in _tag_list: # 发现有tag重复的节点 _type = 'list' break else: _tag_list.append(_childnode.tag) else: _type = 'string' # 按不同type属性类型进行处理 _text = node.text if type(_text) == str: _text = node.text.strip() if _type == 'string': _value = _text elif _type == 'bool': _value = (_text == 'true') elif _type == 'int': _value = round(float(_text)) elif _type == 'float': _value = float(_text) elif _type == 'dict': # 字典 if _childs is None: _childs = node.getchildren() if len(_childs) > 0: # 有子节点 _value = dict() for childnode in node.getchildren(): _child_key, _child_value = self._xml_node_to_dict_value( childnode, item_dict_nodes=item_dict_nodes) # 加到字典里面 if _child_key is not None: _value[_child_key] = _child_value else: # 没有子节点,等同于string _value = _text else: # list或tuple if _type == 'tuple': _value = tuple() else: _value = list() # 判断是否使用字典方式放在列表中 _use_dict = False if item_dict_nodes is not None: for _node in item_dict_nodes: if _node is node: _use_dict = True break # 列表处理 for childnode in node.getchildren(): _child_key, _child_value = self._xml_node_to_dict_value( childnode, item_dict_nodes=item_dict_nodes) if _child_key is None and _child_value is None: # 屏蔽掉注释的情况 continue if _use_dict: # 列表项按字典处理 _child_value = {_child_key: _child_value} # 加入列表中 _value.append(_child_value) # 返回自身的key值和Value值 if _value is None: _value = '' return _key, _value
def get_nth_child(elem: _Element, n: int) -> _Element: """Returns the nth child of an element""" return elem.getchildren()[n]
def _parse_sentence_to_seqs_of_token_attrs(self, xml_node_sentence: _Element) -> Tuple[Tuple[str]]: ret = tuple(zip(*map(self._parse_token, xml_node_sentence.getchildren()))) return ret
def _add_kobo_spans_to_node( self, node: etree._Element, name: str ) -> etree._Element: # process node only if it is not a comment or a processing instruction if ( node is None or isinstance(node, etree._Comment) or isinstance(node, etree._ProcessingInstruction) ): if node is not None: node.tail = None self.log.debug(f"[{name}] Skipping comment/ProcessingInstruction node") return node # Special case some tags special_tag_match = re.search(r"^(?:\{[^\}]+\})?(\w+)$", node.tag) if special_tag_match: # Skipped tags are just flat out skipped if special_tag_match.group(1) in SKIPPED_TAGS: self.log.debug(f"[{name}] Skipping '{special_tag_match.group(1)}' tag") return node # Special tags get wrapped in a span and their children are ignored if special_tag_match.group(1) in SPECIAL_TAGS: self.log.debug( f"[{name}] Wrapping '{special_tag_match.group(1)}' tag and " + "ignoring children" ) span = etree.Element( f"{{{XHTML_NAMESPACE}}}span", attrib={ "id": f"kobo.{self.paragraph_counter[name]}.1", "class": "koboSpan", }, ) span.append(node) return span # save node content for later node_text = node.text node_children = deepcopy(node.getchildren()) node_attrs = {} for key in list(node.keys()): node_attrs[key] = node.get(key) # reset current node, to start from scratch node.clear() # restore node attributes for key in node_attrs: node.set(key, node_attrs[key]) # the node text is converted to spans if node_text is not None: if not self._append_kobo_spans_from_text(node, node_text, name): # didn't add spans, restore text node.text = node_text else: self.paragraph_counter[name] += 1 # re-add the node children for child in node_children: # save child tail for later child_tail = child.tail child.tail = None node.append(self._add_kobo_spans_to_node(child, name)) # the child tail is converted to spans if child_tail is not None: if not self._append_kobo_spans_from_text(node, child_tail, name): # didn't add spans, restore tail on last child node[-1].tail = child_tail else: self.paragraph_counter[name] += 1 return node