def _get_stop(self): # TODO: use complex xpath ref = StopWithoutLineStr( select(self._elements, XPaths.stop_name_ref, namespaces=self._namespaces)) db: ZODB.DB = ZODB.config.databaseFromString(zodb_conf) connection: Connection = db.open() stops_without_line: DBStopWithoutLine = connection.root( ).stops_without_line if ref not in stops_without_line.keys(): name = select(self._elements, XPaths.stop_name_name, namespaces=self._namespaces) stops_without_line[ref] = StopWithoutLine(ref, name) if not stops_without_line[ref].has_location(): lat = select(self._elements, XPaths.stop_name_lat, namespaces=self._namespaces) lon = select(self._elements, XPaths.stop_name_lon, namespaces=self._namespaces) stops_without_line[ref].set_location({ 'latitude': lat, 'longitude': lon }) self._stop = ref
def parseKML(self): # parse XML to pandas self.DataTable """Parse XML content of a MOSMIX .kml file""" success = False if self._kml is not None: try: self.IssueTime = elementpath.select(self._kml, '//dwd:IssueTime/text()', self._kmlNS)[0] self.IssueTime = re.sub('T', ' ', self.IssueTime) self.IssueTime = re.sub('.000Z', '+00:00', self.IssueTime) # now we have the same format as pandas will eventually output for time steps PeriodEnd = elementpath.select(self._kml, '//dwd:ForecastTimeSteps/dwd:TimeStep/text()', self._kmlNS) ParaNames = elementpath.select(self._kml, '//dwd:Forecast/@dwd:elementName', self._kmlNS) valStrArray = elementpath.select(self._kml, '//dwd:Forecast/dwd:value', self._kmlNS) weatherData = {} if (len(ParaNames) != len(valStrArray)): raise Exception("ERROR --- length mismatch in parseKML()") for i, param in enumerate(ParaNames): valStr = valStrArray[i].text.replace('-', 'nan') valArr = valStr.split() valArr = np.array(valArr) valArr = np.asfarray(valArr, float) weatherData.update({ param : valArr }) self.DataTable = pd.DataFrame(weatherData, index=pd.DatetimeIndex(PeriodEnd)) self.DataTable.index.name = 'PeriodEnd' # Time is in UTC success = True except Exception as e: print("parseKLM: " + str(e)) sys.exit(1) return(success)
def concat_not_working_workaround(xpath: str, tree: ElementTree.ElementTree, **kwargs): origin, entities = xpath.split('/concat(') entities: str = entities[:-1] entities: List[str] = entities.split(', ') output: List[str] = [] regions: List[ElementTree.Element] = select(tree, origin, **kwargs) for i in regions: lists: List[List[str]] = [] for j in entities: if j.find("'") == -1: temp_result = select(i, j, **kwargs) if isinstance(temp_result, list): lists.append(temp_result) else: lists.append([temp_result]) else: lists.append([j.replace("'", '')]) length: int = 0 for j in lists: if len(j) > length: length = len(j) output_list: List[str] = [] for j in range(length): string: str = '' for k in lists: if len(k) > 1: string += k[j] elif len(k) == 1: string += k[0] output_list.append(string) output += output_list return output
def get_document_id(self): xPath = """/*/id[@root="1.2.40.0.34.99.4613.3.1"]/@extension""" result = elementpath.select(self.root, xPath, self.namespaces) # TODO: define all possible Document ID xPaths if len(result) == 0: xPath = """/*/id[@root="1.2.40.0.10.1.4.3.4.2.1"]/@extension""" result = elementpath.select(self.root, xPath, self.namespaces) return result[0]
def to_dict(self, xpath=None, namespaces=None, **kwargs): """ 输出xml节点为字典(dict) 注:该函数不支持处理xml中的属性值 @param {string} xpath=None - 符合XPath语法的搜索路径,空代表根节点 @param {dict} namespaces=None - 命名空间 @param {**kwargs} kwargs - 扩展的参数,包括: item_dict_xpaths = None - {dict} - 指定list和tuple情况下,使用字典作为列表项的节点xPath路径 key - 节点对应的xPath,value - 搜索命名空间,值为None或dict 注:xPath为列表节点的路径(非列表项节点路径); xPath的路径从根节点开始查找 @return {dict} - 转换后的字典对象 注:包含节点自身,例如<data><a>val1</a><b>val2</b></data>转换后的字典应该通过dict['data']开始访问 """ # 生成item_dict_nodes参数 _item_dict_nodes = None if 'item_dict_xpaths' in kwargs.keys( ) and kwargs['item_dict_xpaths'] is not None: _item_dict_nodes = list() for _get_xpath in kwargs['item_dict_xpaths'].keys(): if self.use_xpath2: # xpath2.0 _get_nodes = elementpath.select( self.root, _get_xpath, namespaces=kwargs['item_dict_xpaths'][_get_xpath]) else: # xpath1.0 _get_nodes = self.root.xpath( _get_xpath, namespaces=kwargs['item_dict_xpaths'][_get_xpath]) # 合并列表 _item_dict_nodes = _item_dict_nodes + _get_nodes # 获取要处理的节点列表 _roots = [self.root] if xpath is not None: # 获取全部匹配节点 if self.use_xpath2: # xpath2.0 _roots = elementpath.select(self.root, xpath, namespaces=namespaces) else: # xpath1.0 _roots = self.root.xpath(xpath, namespaces=namespaces) # 生成字典 _dict = dict() for _root in _roots: if _root is not None: _key, _value = self._xml_node_to_dict_value( _root, item_dict_nodes=_item_dict_nodes) if _key is not None: _dict[_key] = _value # 返回结果 return _dict
def test_select_function(self): self.assertListEqual(select(self.root, 'text()'), ['Dickens']) self.assertEqual(select(self.root, '$a', variables={'a': 1}), 1) self.assertEqual( select(self.root, '$a', variables={'a': 1}, variable_types={'a': 'xs:decimal'}), 1)
def get_patient_name(self): xPathVornamen = """//recordTarget/patientRole/patient/name/given/text()""" xPathNachname = """//recordTarget/patientRole/patient/name/family[not(@qualifier)]/text()""" result = {} result['vornamen'] = elementpath.select(self.root, xPathVornamen, self.namespaces) result['nachname'] = elementpath.select(self.root, xPathNachname, self.namespaces) return result
def _get_cords(self): # TODO: Use complex Xpaths lat = select(self._elements, XPaths.location_lat, namespaces=self._namespaces) self._lat: float = float(lat[0]) if lat else None lon = select(self._elements, XPaths.location_lon, namespaces=self._namespaces) self._lon: float = float(lon[0]) if lon else None
def extract(self) -> dict: """ Extracts data from the HTML file using the given regex expressions :return: JSON formed dict of extracted data """ if not self.extracted: # if extraction was not performed yet # extract values that appear only once if self.single: for key, val in self.single.items(): if isinstance(val, list): if val[1] == self.FORCE_LXML: self.extracted[key] = self.joiner( self.tree.xpath(val[0])) else: self.extracted[key] = (self.joiner( elementpath.select(self.tree, val[0]), val[1])) else: self.extracted[key] = self.joiner( elementpath.select(self.tree, val)) # extract values that appear multiple times if self.multiple: tmp_extracted = {} tmp_len = 0 multiple_extracted = [] # get the results for each data item for key, val in self.multiple.items(): if isinstance(val, list): if val[1] == self.FORCE_LXML: current = self.tree.xpath(val[0]) else: current = (elementpath.select(self.tree, val[0]), val[1]) else: current = elementpath.select(self.tree, val) tmp_len = len(current) tmp_extracted[key] = current # this goes from 0...amount of extracted data items for i in range(tmp_len): tmp_dict = {} # copy each i-th data item to tmp_dict for k, v in tmp_extracted.items(): if isinstance(v, tuple): tmp_dict[k] = self.joiner(v[0], v[1], i) else: tmp_dict[k] = self.joiner(v, i=i) multiple_extracted.append(tmp_dict) if self.multiple_title is None: self.extracted['Items'] = multiple_extracted else: self.extracted[self.multiple_title] = multiple_extracted return self.extracted
def get_attr(self, xpath, attr_name, default='', namespaces=None): """ 获取指定节点的属性值 @param {string} xpath - 符合XPath语法的搜索路径 注:(1)如果指定了命名空间,可以通过命名空间的key进行指定,例如'real_person:actor' (2)xPath如果不是从'/'开始,则默认从root节点开始搜索 @param {string} attr_name - 属性名 @param {string} default='' - 如果找不到节点或具体属性时默认返回的值 @param {dict} namespaces=None - 命名空间 可传入值的示例如下: ns = { 'real_person': 'http://people.example.com', 'role': 'http://characters.example.com' } @return {string} - 第一个匹配节点的指定属性文本值,如果没有找到匹配节点或属性,返回'' """ if self.use_xpath2: # xpath2.0 _nodes = elementpath.select(self.root, xpath, namespaces=namespaces) else: # xpath1.0 _nodes = self.root.xpath(xpath, namespaces=namespaces) if len(_nodes) == 0: return default else: # 返回属性值 return _nodes[0].get(attr_name, default=default)
def check_selector(self, path, root, expected, namespaces=None, **kwargs): """ Checks using the selector API, namely the *select* function at package level. :param path: an XPath expression. :param root: an Element or an ElementTree instance. :param expected: the expected result. Can be a data instance to compare to the result, \ a type to be used to check the type of the result, a function that accepts the result \ as argument and returns a boolean value, an exception class that is raised by running \ the evaluate method. :param namespaces: an optional mapping from prefixes to namespace URIs. :param kwargs: other optional arguments for the parser class. """ if isinstance(expected, type) and issubclass(expected, Exception): self.assertRaises(expected, select, root, path, namespaces, self.parser.__class__, **kwargs) else: results = select(root, path, namespaces, self.parser.__class__, **kwargs) if isinstance(expected, list): self.assertListEqual(results, expected) elif isinstance(expected, set): self.assertEqual(set(results), expected) elif isinstance(expected, float) and math.isnan(expected): self.assertTrue(math.isnan(results)) elif not callable(expected): self.assertEqual(results, expected) elif isinstance(expected, type): self.assertIsInstance(results, expected) else: self.assertTrue(expected(results))
def get_default_value(self, xsd_element_node, xml_parent_node): # The attribute @default of the xsd:element node v = xsd_element_node.get("default") if v is not None: return v # The acrn:defaults and acrn:unique-among annotations which define a set of default values that shall be unique # among a collection of nodes annot_node = self.get_node(xsd_element_node, "xs:annotation") if annot_node is not None: defaults = annot_node.get("{https://projectacrn.org}defaults") unique_among = annot_node.get( "{https://projectacrn.org}unique-among") if defaults is not None and unique_among is not None: try: default_values = set(eval(defaults)) existing_values = set( elementpath.select( self.xml_etree, unique_among, variables={"parent": xml_parent_node})) available_defaults = default_values - existing_values return sorted(list(available_defaults))[0] except: pass return None
def evaluate_cda_file_Etree(self, xpath, cda_file): namespaces = {'': 'urn:hl7-org:v3'} try: root = self.get_root_from_xml(self, cda_file=cda_file) results = elementpath.select(root, xpath, namespaces) except elementpath.exceptions.ElementPathSyntaxError: print( "Log: CDAEvaluator.evaluate_cda_file_Etree 'ElementPathSyntaxError'" ) return ["ElementPathSyntaxError"] except elementpath.exceptions.ElementPathTypeError: print( "Log: CDAEvaluator.evaluate_cda_file_Etree 'ElementPathTypeError'" ) return ["ElementPathTypeError"] except FileNotFoundError: print( "Log: CDAEvaluator.evaluate_cda_file_Etree 'FileNotFoundError'" ) return ["FileNotFoundError"] if results is not None and len(results) != 0: for entry in results: # info: that happens if the xpath is not specific enough so no '@' specification at the end if type(entry) is ET.Element: index = results.index(entry) results[ index] = "Ergebnisse gefunden (siehe ELGA Dokument)" return results return []
def test_iter_siblings__issue_44(self): root = lxml_etree.XML( '<root>text 1<!-- comment -->text 2<!-- comment --> text 3</root>') result = select(root, 'node()[1]/following-sibling::node()') self.assertListEqual(result, [root[0], 'text 2', root[1], ' text 3']) self.assertListEqual(result, root.xpath('node()[1]/following-sibling::node()'))
def set_value(self, xpath, value, namespaces=None, auto_create=True, debug=False, ignore_path_check=False): """ 设置指定节点的值 @param {string} xpath - 符合XPath语法的搜索路径 注:(1)如果指定了命名空间,可以通过命名空间的key进行指定,例如'real_person:actor' (2)xPath如果不是从'/'开始,则默认从root节点开始搜索 @param {string} value - 要设置的值 @param {dict} namespaces=None - 命名空间 可传入值的示例如下: ns = { 'real_person': 'http://people.example.com', 'role': 'http://characters.example.com' } @param {bool} auto_create=True - 节点不存在的时候是否自动创建节点 @param {bool} debug=False - 如果出现不可预知的异常时,打印入参 @param {bool} ignore_path_check=False - 是否忽略路径检查(不检查直接操作) @throw {NameError} - 当节点不存在时抛出该异常 @throws {AttributeError} - 当搜索路径不符合自动创建规范时,抛出该异常 """ try: if self.use_xpath2: # xpath2.0 _nodes = elementpath.select(self.root, xpath, namespaces=namespaces) else: # xpath1.0 _nodes = self.root.xpath(xpath, namespaces=namespaces) if len(_nodes) == 0: if auto_create: # 找不到节点,尝试自动创建节点 _node = self.append_path_node( xpath, namespaces=namespaces, ignore_path_check=ignore_path_check) _node.text = value else: # 不创建节点,抛出异常 raise NameError('can\'t find node by xpath') else: for _node in _nodes: # 设置节点值 _node.text = value except: if debug: # 打印入参 print( 'set_value error, para:[xpath=%s][value=%s][namespaces=%s][auto_create=%s]' % (xpath, value, namespaces, auto_create)) raise
def to_string(self, xpath=None, namespaces=None, **kwargs): """ 输出xml节点为字符串 @param {string} xpath=None - 符合XPath语法的搜索路径,空代表根节点 @param {dict} namespaces=None - 命名空间 @param {**kwargs} kwargs - 扩展的参数,包括etree.tostring的参数: method="xml" xml_declaration=None - 控制是否在文件中添加xml的声明,True - 一直添加, False - 不添加 如果传None,代表只有encoding不是US-ASCII or UTF-8 or Unicode的时候才添加声明 pretty_print=True - 是否针对打印格式美化 with_tail=True standalone=None doctype=None exclusive=False inclusive_ns_prefixes=None with_comments=True strip_text=False @return {string} - 输出的字符串 """ _node = None if xpath is None: _node = self.root else: if self.use_xpath2: # xpath2.0 _nodes = elementpath.select(self.root, xpath, namespaces=namespaces) else: # xpath1.0 _nodes = self.root.xpath(xpath, namespaces=namespaces) if len(_nodes) > 0: _node = _nodes[0] return ET.tostring( _node, encoding=self.encoding, method="xml" if 'method' not in kwargs.keys() else kwargs['method'], xml_declaration=None if 'xml_declaration' not in kwargs.keys() else kwargs['xml_declaration'], pretty_print=True if 'pretty_print' not in kwargs.keys() else kwargs['pretty_print'], with_tail=True if 'with_tail' not in kwargs.keys() else kwargs['with_tail'], standalone=None if 'standalone' not in kwargs.keys() else kwargs['standalone'], doctype=None if 'doctype' not in kwargs.keys() else kwargs['doctype'], exclusive=False if 'exclusive' not in kwargs.keys() else kwargs['exclusive'], inclusive_ns_prefixes=None if 'inclusive_ns_prefixes' not in kwargs.keys() else kwargs['inclusive_ns_prefixes'], with_comments=True if 'with_comments' not in kwargs.keys() else kwargs['with_comments'], strip_text=False if 'strip_text' not in kwargs.keys() else kwargs['strip_text']).decode(encoding=self.encoding)
def elementpath_lxml_path_with_predicate(): results = select(metadata_lxml_root, path1, namespaces, parser=XPath1Parser) assert len(results) == 1 assert results[0] is metadata_lxml_root[5][2][1][9] assert results[0].get('Algorithm') == value
def get_date_created(self): xPath = """/ClinicalDocument/effectiveTime/@value""" resultStr = elementpath.select(self.root, xPath, self.namespaces) result = None # print(resultStr[0]) # TODO: falsches Datum abfangen if len(resultStr[0]) == 19: result = datetime.strptime(resultStr[0], '%Y%m%d%H%M%S%z') if len(resultStr[0]) == 8: result = datetime.strptime(resultStr[0], '%Y%m%d') result = pytz.utc.localize(result) return result
def get_reference_id_from_result(self, xpath): namespaces = {'': 'urn:hl7-org:v3'} hit = False while hit is False: #if xpath contains "concat" remove everything after that result = xpath.find('/concat') if result > 0: xpath = xpath[:result] xpath += '/parent::*' if len(elementpath.select(self.root, xpath, namespaces)) == 0: break try: hit = True if len( elementpath.select(self.root, xpath + '/parent::entry', namespaces)) > 0 else False except elementpath.exceptions.ElementPathSyntaxError: print("Syntax Error") except FileNotFoundError: print("File not Found!") except elementpath.exceptions.ElementPathTypeError: print("Path Error") break except RecursionError: print("RecursionError") if hit is True: xpath += '//text/reference/@value' results = elementpath.select(self.root, xpath, namespaces) if results is not None and len(results) != 0: for entry in results: index = results.index(entry) reference = str(entry.replace('#', '')) _results = elementpath.select( self.root, "//component[section//*/@ID = '" + reference + "']/section/code/@code", namespaces) results[index] = 'id' + str(_results[0]) return results return []
def set_attr(self, xpath, attr_name, value, namespaces=None, auto_create=True, ignore_path_check=False): """ 设置指定节点的值(只要节点存在强制新增属性) @param {string} xpath - 符合XPath语法的搜索路径 注:(1)如果指定了命名空间,可以通过命名空间的key进行指定,例如'real_person:actor' (2)xPath如果不是从'/'开始,则默认从root节点开始搜索 @param {string} attr_name - 属性名 @param {string} value - 要设置的值 @param {dict} namespaces=None - 命名空间 可传入值的示例如下: ns = { 'real_person': 'http://people.example.com', 'role': 'http://characters.example.com' } @param {bool} auto_create=True - 节点不存在的时候是否自动创建 @param {bool} ignore_path_check=False - 是否忽略路径检查(不检查直接操作) @throw {NameError} - 当节点不存在时抛出该异常 @throws {AttributeError} - 当搜索路径不符合自动创建规范时,抛出该异常 """ if self.use_xpath2: # xpath2.0 _nodes = elementpath.select(self.root, xpath, namespaces=namespaces) else: # xpath1.0 _nodes = self.root.xpath(xpath, namespaces=namespaces) if len(_nodes) == 0: if auto_create: # 找不到节点,尝试自动创建节点 _nodes = [ self.append_path_node(xpath, namespaces=namespaces, ignore_path_check=ignore_path_check) ] else: # 不创建节点,抛出异常 raise NameError('can\'t find node by xpath') for _node in _nodes: # 设置节点属性值 _node.set(attr_name, value)
def check_current(self, parser_class): xml_doc = etree.parse( StringIO(""" <root> <element id="a">value</element> <element id="b" ref="a">other value</element> </root> """)) expr = "current()" for node in xml_doc.iter(): nodes = select_with_context(xml_doc, node, expr, parser=parser_class) self.assertEqual([node], nodes) node_a = select(xml_doc, "//element")[0] node_b = select(xml_doc, "//element")[1] ref_node = select(xml_doc, "//element[@ref]")[0] nodes = select_with_context(xml_doc, ref_node, "/root/element[@id=current()/@ref]", parser=parser_class) self.assertEqual([node_a], nodes)
def get_context_elements(self, xml_document, rule_context, namespaces, variables): result = select(xml_document, rule_context, namespaces=namespaces, variables=variables) if rule_context.startswith('/'): return result else: selector = Selector(rule_context, namespaces=namespaces, variables=variables) for el in xml_document.iter(): result.extend(selector.select(el)) return result
def get_nodes(self, xpath, namespaces=None): """ 获取xpath指定的节点清单 @param {string} xpath - 符合XPath语法的搜索路径 @param {dict} namespaces=None - 命名空间 @return {list} - 返回节点清单(返回的数组内部对象为ET._Element) """ if self.use_xpath2: # xpath2.0 _els = elementpath.select(self.root, xpath, namespaces=namespaces) else: # xpath1.0 _els = self.root.xpath(xpath, namespaces=namespaces) return _els
def type_of_element(self, type_tag, xsd_element_node, xml_node): xsd_alternative_node = xsd_element_node if xml_node is not None: for alternative in xsd_element_node.findall("xs:alternative", namespaces=self.xpath_ns): if elementpath.select(xml_node, alternative.get("test")): xsd_alternative_node = alternative break type_node = xsd_alternative_node.find(type_tag, namespaces=self.xpath_ns) if type_node is None: type_name = xsd_alternative_node.get("type") if type_name: type_node = self.get_node(self.xsd_etree, f".//{type_tag}[@name='{type_name}']") return type_node
def findall(self, path, namespaces=None): """ Finds all matching XSD/XML elements or attributes. :param path: is an XPath expression that considers the schema as the root element \ with global elements as its children. :param namespaces: an optional mapping from namespace prefix to full name. :return: a list containing all matching XSD/XML elements or attributes. An empty list \ is returned if there is no match. """ if path.startswith('/'): path = u'.%s' % path return select(self, path, namespaces or self.xpath_namespaces, strict=False)
def format_paths(unified_node, parent_map, report_on, variables): elems = elementpath.select(unified_node, report_on, variables = variables, parser = elementpath.XPath2Parser) paths = [] for elem in elems: path = [] while elem is not None: path_segment = elem.tag parent = parent_map.get(elem, None) if parent is not None: children = parent.findall(elem.tag) if len(children) > 1: path_segment += f"[{children.index(elem) + 1}]" path.insert(0, path_segment) elem = parent paths.append(f"/{'/'.join(path)}") return paths
def test_basic_example(self): schema = Schema(get_file("schematron", "basic.sch")) doc = etree.parse(get_file("xml", "basic1_ok.xml")) variables = {} parser = XPath2Parser(schema.ns_prefixes, variables) for p in schema.patterns.values(): # print("[XX] %s has %d rules" % (p.id, len(p.rules))) for r in p.rules: elements = select(doc, r.context) for element in elements: context = XPathContext(root=doc, item=element) for a in r.assertions: root_token = parser.parse(a.test) result = root_token.evaluate(context) self.assertTrue(result, a.to_string())
def main() -> None: from xml.etree.ElementTree import XML import elementpath root = XML('<a><b1/><b2><c1/><c2/></b2><b3/></a>') result = elementpath.select(root, '*') print(result) result = list(elementpath.iter_select(root, '*')) print(result) selector = elementpath.Selector('*') result = selector.select(root) print(result) result = list(selector.iter_select(root)) print(result)
def construct_complex_xpath(type: str, is_lineref: bool, single: bool, *args: str, tree: ElementTree.ElementTree, **kwargs) -> List[str]: if not args: raise NotImplementedError( 'Tried to run construct_complex_xpath without xpath names') if type == 'StopEvent' or type == 'Trip': trip: bool = True if type == 'StopEvent': trip = False xpaths: List[str] = [] prefixes: List[str] = [] for i in args: xpaths.append(paths[i][0]) prefixes.append(paths[i][1]) extension: str separator: str = ", '" + kwargs.get('separator', ' # ') + "', " if prefixes.count(prefixes[0]) == len(prefixes): extension = prefixes[0] + '/concat(' for i in xpaths: extension += i + separator extension = extension[:-(len(separator))] + ')' else: extension = '/concat(' for i in range(len(xpaths)): extension += prefixes[i][1:] + '/' + xpaths[i] + separator extension = extension[:-(len(separator))] + ')' if type == 'StopEvent': extension = extension.replace(timed_leg[1:], this_call[1:]) xpath = construct_simple_xpath(trip, is_lineref, single, extension, **kwargs) elementtree_kwargs: Dict[str, Any] = { 'namespaces': kwargs.get('namespaces') } result: List[str] if elementpath_concat_fixed: result = select(tree, xpath, **elementtree_kwargs) else: result = concat_not_working_workaround(xpath, tree, **elementtree_kwargs) return result else: raise NotImplementedError('Unknown type ' + type + ' at construct_complex_xpath.')
def remove(self, xpath, namespaces=None, hold_tail=False): """ 根据xpath删除节点 @param {string} xpath - 符合XPath语法的搜索路径 @param {dict} namespaces=None - 命名空间 @param {bool} hold_tail=False - 是否保留上一节点的tail信息 """ if self.use_xpath2: # xpath2.0 _nodes = elementpath.select(self.root, xpath, namespaces=namespaces) else: # xpath1.0 _nodes = self.root.xpath(xpath, namespaces=namespaces) for _node in _nodes: self.remove_node(_node, hold_tail=hold_tail)
def iter_decode(self, source, path=None, validation='lax', process_namespaces=True, namespaces=None, use_defaults=True, decimal_type=None, datetime_types=False, converter=None, defuse=None, timeout=None, **kwargs): """ Creates an iterator for decoding an XML source to a data structure. :param source: the XML data source. Can be a path to a file or an URI of a resource or \ an opened file-like object or an Element Tree instance or a string containing XML data. :param path: is an optional XPath expression that matches the parts of the document \ that have to be decoded. The XPath expression considers the schema as the root \ element with global elements as its children. :param validation: defines the XSD validation mode to use for decode, can be 'strict', \ 'lax' or 'skip'. :param process_namespaces: indicates whether to use namespace information in the decoding \ process, using the map provided with the argument *namespaces* and the map extracted from \ the XML document. :param namespaces: is an optional mapping from namespace prefix to URI. :param use_defaults: indicates whether to use default values for filling missing data. :param decimal_type: conversion type for `Decimal` objects (generated by XSD `decimal` \ built-in and derived types), useful if you want to generate a JSON-compatible data structure. :param datetime_types: if set to `True` the datetime and duration XSD types are decoded, \ otherwise their origin XML string is returned. :param converter: an :class:`XMLSchemaConverter` subclass or instance to use for the decoding. :param defuse: Overrides when to defuse XML data. Can be 'always', 'remote' or 'never'. :param timeout: Overrides the timeout setted for the schema. :param kwargs: Keyword arguments containing options for converter and decoding. :return: Yields a decoded data object, eventually preceded by a sequence of validation \ or decoding errors. """ if not self.built: raise XMLSchemaNotBuiltError(self, "schema %r is not built." % self) elif not self.elements: raise XMLSchemaValueError("decoding needs at least one XSD element declaration!") if not isinstance(source, XMLResource): defuse = defuse or self.defuse timeout = timeout or self.timeout source = XMLResource(source=source, defuse=defuse, timeout=timeout, lazy=False) elif defuse and source.defuse != defuse or timeout and source.timeout != timeout: source = source.copy(defuse=defuse, timeout=timeout, lazy=False) if process_namespaces: namespaces = {} if namespaces is None else namespaces.copy() namespaces.update(source.get_namespaces()) else: namespaces = {} converter = self.get_converter(converter, namespaces, **kwargs) if path is None: xsd_element = self.find(source.root.tag, namespaces=namespaces) if not isinstance(xsd_element, XsdElement): reason = "%r is not a global element of the schema!" % source.root.tag yield XMLSchemaValidationError(self, source.root, reason, source, namespaces) else: for obj in xsd_element.iter_decode( source.root, validation, converter, source=source, namespaces=namespaces, use_defaults=use_defaults, decimal_type=decimal_type, datetime_types=datetime_types, **kwargs): yield obj else: xsd_element = self.find(path, namespaces=namespaces) if not isinstance(xsd_element, XsdElement): reason = "the path %r doesn't match any element of the schema!" % path obj = elementpath.select(source.root, path, namespaces=namespaces) or source.root yield XMLSchemaValidationError(self, obj, reason, source, namespaces) else: for elem in elementpath.select(source.root, path, namespaces=namespaces): for obj in xsd_element.iter_decode( elem, validation, converter, source=source, namespaces=namespaces, use_defaults=use_defaults, decimal_type=decimal_type, datetime_types=datetime_types, **kwargs): yield obj