def test_issue_001(self): selector = Selector("//FullPath[ends-with(., 'Temp')]") self.assertListEqual(selector.select(ElementTree.XML('<A/>')), []) self.assertListEqual(selector.select(ElementTree.XML('<FullPath/>')), []) root = ElementTree.XML('<FullPath>High Temp</FullPath>') self.assertListEqual(selector.select(root), [root])
def test_issue_042(self): selector1 = Selector('text()') selector2 = Selector('sup[last()]/preceding-sibling::text()') root = ElementTree.XML( '<root>a<sup>1</sup>b<sup>2</sup>c<sup>3</sup></root>') self.assertListEqual(selector1.select(root), selector2.select(root)) selector2 = Selector('sup[1]/following-sibling::text()') root = ElementTree.XML( '<root><sup>1</sup>b<sup>2</sup>c<sup>3</sup>d</root>') self.assertListEqual(selector1.select(root), selector2.select(root))
def test_selector_class(self): selector = Selector('/A') self.assertEqual(repr(selector), "Selector(path='/A', parser=XPath2Parser)") self.assertEqual(selector.namespaces, XPath2Parser.DEFAULT_NAMESPACES) selector = Selector('text()') self.assertListEqual(selector.select(self.root), ['Dickens']) self.assertListEqual(list(selector.iter_select(self.root)), ['Dickens']) selector = Selector('$a', variables={'a': 1}) self.assertEqual(selector.select(self.root), 1) self.assertListEqual(list(selector.iter_select(self.root)), [1])
def iterfind(self, path=None, namespaces=None): """XML resource tree iterfind selector.""" if not self._lazy: if path is None: yield self._root else: for e in iter_select(self._root, path, namespaces, strict=False): yield e return elif self._fid is not None: self._fid.seek(0) resource = self._fid elif self._url is not None: resource = urlopen(self._url, timeout=self.timeout) else: self.load() resource = StringIO(self._text) try: if path is None: level = 0 for event, elem in self.iterparse(resource, events=('start', 'end')): if event == "start": if level == 0: self._root.clear() self._root = elem level += 1 else: level -= 1 if level == 0: yield elem elem.clear() else: selector = Selector(path, namespaces, strict=False) level = 0 for event, elem in self.iterparse(resource, events=('start', 'end')): if event == "start": if level == 0: self._root.clear() self._root = elem level += 1 else: level -= 1 if elem in selector.select(self._root): yield elem elem.clear() elif level == 0: elem.clear() finally: if self._fid is None: resource.close()
def iterfind(self, path=None, namespaces=None): """XML resource tree iterfind selector.""" if not self._lazy: if path is None: yield self._root else: for e in iter_select(self._root, path, namespaces, strict=False): yield e return elif self.seek(0) == 0: resource = self.source elif self._url is not None: resource = urlopen(self._url, timeout=self.timeout) else: self.load() resource = StringIO(self._text) try: if path is None: level = 0 for event, elem in self.iterparse(resource, events=('start', 'end')): if event == "start": if level == 0: self._root.clear() self._root = elem level += 1 else: level -= 1 if level == 0: yield elem elem.clear() else: selector = Selector(path, namespaces, strict=False, parser=XmlResourceXPathParser) path = path.replace(' ', '').replace('./', '') path_level = path.count('/') + 1 if path != '.' else 0 select_all = '*' in path and set(path).issubset({'*', '/'}) level = 0 for event, elem in self.iterparse(resource, events=('start', 'end')): if event == "start": if level == 0: self._root.clear() self._root = elem level += 1 else: level -= 1 if level == path_level and \ (select_all or elem in selector.select(self._root)): yield elem elem.clear() elif level == 0: elem.clear() finally: if self.source is not resource: resource.close()
def get_context_elements(self, xml_document, rule_context, namespaces, variables): result = select(xml_document, rule_context, namespaces=namespaces, variables=variables) if rule_context.startswith('/'): return result else: selector = Selector(rule_context, namespaces=namespaces, variables=variables) for el in xml_document.iter(): result.extend(selector.select(el)) return result
def iterfind(self, path=None, namespaces=None): """ XML resource tree iterfind selector. :param path: an XPath expression to select nodes. If not provided the \ iteration returns only the root node. :param namespaces: optional mapping from namespace prefixes to URIs. If the \ resource is lazy and an empty dictionary is provided, the namespace map is \ updated during the iteration. """ if not self._lazy: if path is None: yield self._root else: for e in iter_select(self._root, path, namespaces, strict=False): yield e return elif self.seek(0) == 0: resource = self.source elif self._url is not None: resource = urlopen(self._url, timeout=self.timeout) else: self.load() resource = StringIO(self._text) if namespaces or namespaces is None: events = ('start', 'end') nsmap = None else: # Track ad update namespaces events = ('start-ns', 'end-ns', 'start', 'end') nsmap = [] try: if path is None: level = 0 for event, node in self.iterparse(resource, events): if event == "start": if level == 0: self._root.clear() self._root = node level += 1 elif event == 'end': level -= 1 if level == 0: yield node node.clear() elif event == 'start-ns': nsmap.append(node) namespaces[node[0]] = node[1] else: try: del namespaces[nsmap.pop()[0]] except KeyError: pass namespaces.update(nsmap) else: selector = Selector(path, namespaces, strict=False, parser=XmlResourceXPathParser) path = path.replace(' ', '').replace('./', '') path_level = path.count('/') + 1 if path != '.' else 0 select_all = '*' in path and set(path).issubset({'*', '/'}) level = 0 for event, node in self.iterparse(resource, events): if event == "start": if level == 0: self._root.clear() self._root = node level += 1 elif event == 'end': level -= 1 if level == path_level and \ (select_all or node in selector.select(self._root)): yield node node.clear() elif level == 0: node.clear() elif event == 'start-ns': nsmap.append(node) namespaces[node[0]] = node[1] else: try: del namespaces[nsmap.pop()[0]] except KeyError: pass namespaces.update(nsmap) finally: if self.source is not resource: resource.close()
def iter_subtrees(self, path=None, namespaces=None, lazy_mode=1, ancestors=None): """ XML resource subtree iterator, that yields fully loaded elements. If a path is provided the elements selected by the XPath expression are yielded. If no path is provided only the root element is yielded. For lazy resources the argument *lazy_mode* can change the sequence of elements yielded. There are five possible modes, that generate different sequences of elements:\n 1. Only a full root element (the default mode)\n 2. Only a root element pruned at *depth_level*\n 3. Only the elements at *depth_level* level of the tree\n 4. The elements at *depth_level* and then a pruned root\n 5. An incomplete root at start, the elements at *depth_level* and a pruned root :param path: an optional XPath expression to select element nodes. :param namespaces: an optional mapping from namespace prefixes to URIs. \ Used to provide namespace mapping for the XPath expression. If the resource \ is lazy the namespace map is updated during the iteration. :param lazy_mode: defines how a lazy resource is iterated when a path \ is not provided. :param ancestors: if a list is provided the iterator tracks the list of \ ancestors of yielded elements of lazy resources. """ if not (1 <= lazy_mode <= 5): raise XMLSchemaValueError( "invalid argument lazy_mode={!r}".format(lazy_mode)) if not self._lazy: if path is None: yield self._root else: yield from iter_select(self._root, path, namespaces, strict=False) return if self.seek(0) == 0: resource = self.source elif self._url is not None: resource = urlopen(self._url, timeout=self.timeout) else: self.load() resource = StringIO(self._text) nsmap = [] level = 0 changed = False if namespaces is None: events = 'start', 'end' else: # Track ad update namespaces events = 'start-ns', 'end-ns', 'start', 'end' if ancestors is None: ancestors = [] if path is None: subtree_level = int(self._lazy) if lazy_mode > 1 else 0 select_all = True selector = None skip_depth_elements = lazy_mode < 3 else: selector = Selector(path, namespaces, strict=False, parser=XmlResourceXPathParser) path = path.replace(' ', '').replace('./', '') if path == '.': subtree_level = 0 elif path.startswith('/'): subtree_level = path.count('/') - 1 else: subtree_level = path.count('/') + 1 select_all = '*' in path and set(path).issubset({'*', '/'}) skip_depth_elements = False try: for event, node in self.iterparse(resource, events): if event == "start": if not level: self._root.clear() self._root = node if not path and lazy_mode == 5: yield node ancestors.append(node) elif level < subtree_level: ancestors.append(node) level += 1 elif event == 'end': level -= 1 if not level: if not path: if lazy_mode != 3: yield node elif subtree_level: pass elif select_all or node in selector.select(self._root): yield node elif level != subtree_level: if level < subtree_level: ancestors.pop() continue elif skip_depth_elements: pass elif select_all or node in selector.select(self._root): yield node del node[:] # delete children, keep attributes, text and tail. if changed: namespaces.clear() namespaces.update(nsmap) changed = False elif event == 'start-ns': nsmap.append(node) update_prefix(namespaces, *node) else: nsmap.pop() changed = True finally: if self.source is not resource: resource.close()