Esempio n. 1
0
    def iterfind(self, path=None, namespaces=None):
        """XML resource tree iterfind selector."""
        if not self._lazy:
            if path is None:
                yield self._root
            else:
                for e in iter_select(self._root,
                                     path,
                                     namespaces,
                                     strict=False):
                    yield e
            return
        elif self._fid is not None:
            self._fid.seek(0)
            resource = self._fid
        elif self._url is not None:
            resource = urlopen(self._url, timeout=self.timeout)
        else:
            self.load()
            resource = StringIO(self._text)

        try:
            if path is None:
                level = 0
                for event, elem in self.iterparse(resource,
                                                  events=('start', 'end')):
                    if event == "start":
                        if level == 0:
                            self._root.clear()
                            self._root = elem
                        level += 1
                    else:
                        level -= 1
                        if level == 0:
                            yield elem
                            elem.clear()
            else:
                selector = Selector(path, namespaces, strict=False)
                level = 0
                for event, elem in self.iterparse(resource,
                                                  events=('start', 'end')):
                    if event == "start":
                        if level == 0:
                            self._root.clear()
                            self._root = elem
                        level += 1
                    else:
                        level -= 1
                        if elem in selector.select(self._root):
                            yield elem
                            elem.clear()
                        elif level == 0:
                            elem.clear()
        finally:
            if self._fid is None:
                resource.close()
Esempio n. 2
0
    def iterfind(self, path=None, namespaces=None):
        """XML resource tree iterfind selector."""
        if not self._lazy:
            if path is None:
                yield self._root
            else:
                for e in iter_select(self._root, path, namespaces, strict=False):
                    yield e
            return
        elif self.seek(0) == 0:
            resource = self.source
        elif self._url is not None:
            resource = urlopen(self._url, timeout=self.timeout)
        else:
            self.load()
            resource = StringIO(self._text)

        try:
            if path is None:
                level = 0
                for event, elem in self.iterparse(resource, events=('start', 'end')):
                    if event == "start":
                        if level == 0:
                            self._root.clear()
                            self._root = elem
                        level += 1
                    else:
                        level -= 1
                        if level == 0:
                            yield elem
                            elem.clear()
            else:
                selector = Selector(path, namespaces, strict=False, parser=XmlResourceXPathParser)
                path = path.replace(' ', '').replace('./', '')
                path_level = path.count('/') + 1 if path != '.' else 0
                select_all = '*' in path and set(path).issubset({'*', '/'})

                level = 0
                for event, elem in self.iterparse(resource, events=('start', 'end')):
                    if event == "start":
                        if level == 0:
                            self._root.clear()
                            self._root = elem
                        level += 1
                    else:
                        level -= 1
                        if level == path_level and \
                                (select_all or elem in selector.select(self._root)):
                            yield elem
                            elem.clear()
                        elif level == 0:
                            elem.clear()
        finally:
            if self.source is not resource:
                resource.close()
Esempio n. 3
0
    def iterfind(self, path, namespaces=None):
        """
        Generates all matching XSD/XML element declarations by path.

        :param path: is an XPath expression that considers the schema as the root element \
        with global elements as its children.
        :param namespaces: is an optional mapping from namespace prefix to full name.
        :return: an iterable yielding all matching declarations in the XSD/XML order.
        """
        if path.startswith('/'):
            path = u'.%s' % path  # Avoid document root positioning
        return iter_select(self,
                           path,
                           namespaces or self.xpath_namespaces,
                           strict=False)
Esempio n. 4
0
    def find(self, path, namespaces=None):
        """
        Finds the first XSD/XML element or attribute matching the path.

        :param path: is an XPath expression that considers the schema as the root element \
        with global elements as its children.
        :param namespaces: an optional mapping from namespace prefix to full name.
        :return: The first matching XSD/XML element or attribute or ``None`` if there is not match.
        """
        if path.startswith('/'):
            path = u'.%s' % path
        return next(
            iter_select(self,
                        path,
                        namespaces or self.xpath_namespaces,
                        strict=False), None)
Esempio n. 5
0
def main() -> None:
    from xml.etree.ElementTree import XML
    import elementpath

    root = XML('<a><b1/><b2><c1/><c2/></b2><b3/></a>')

    result = elementpath.select(root, '*')
    print(result)

    result = list(elementpath.iter_select(root, '*'))
    print(result)

    selector = elementpath.Selector('*')
    result = selector.select(root)
    print(result)

    result = list(selector.iter_select(root))
    print(result)
Esempio n. 6
0
                def selector(*args, **kwargs):
                    for e in iter_select(*args, **kwargs):
                        if e is self._root:
                            ancestors.clear()
                        else:
                            _ancestors = []
                            parent = e
                            try:
                                while True:
                                    parent = parent_map[parent]
                                    if parent is not None:
                                        _ancestors.append(parent)
                            except KeyError:
                                pass

                            if _ancestors:
                                ancestors.clear()
                                ancestors.extend(reversed(_ancestors))

                        yield e
Esempio n. 7
0
    def iterfind(self, path=None, namespaces=None):
        """
        XML resource tree iterfind selector.

        :param path: an XPath expression to select nodes. If not provided the \
        iteration returns only the root node.
        :param namespaces: optional mapping from namespace prefixes to URIs. If the \
        resource is lazy and an empty dictionary is provided, the namespace map is \
        updated during the iteration.
        """
        if not self._lazy:
            if path is None:
                yield self._root
            else:
                for e in iter_select(self._root,
                                     path,
                                     namespaces,
                                     strict=False):
                    yield e
            return
        elif self.seek(0) == 0:
            resource = self.source
        elif self._url is not None:
            resource = urlopen(self._url, timeout=self.timeout)
        else:
            self.load()
            resource = StringIO(self._text)

        if namespaces or namespaces is None:
            events = ('start', 'end')
            nsmap = None
        else:
            # Track ad update namespaces
            events = ('start-ns', 'end-ns', 'start', 'end')
            nsmap = []

        try:
            if path is None:
                level = 0
                for event, node in self.iterparse(resource, events):
                    if event == "start":
                        if level == 0:
                            self._root.clear()
                            self._root = node
                        level += 1
                    elif event == 'end':
                        level -= 1
                        if level == 0:
                            yield node
                            node.clear()
                    elif event == 'start-ns':
                        nsmap.append(node)
                        namespaces[node[0]] = node[1]
                    else:
                        try:
                            del namespaces[nsmap.pop()[0]]
                        except KeyError:
                            pass
                        namespaces.update(nsmap)

            else:
                selector = Selector(path,
                                    namespaces,
                                    strict=False,
                                    parser=XmlResourceXPathParser)
                path = path.replace(' ', '').replace('./', '')
                path_level = path.count('/') + 1 if path != '.' else 0
                select_all = '*' in path and set(path).issubset({'*', '/'})

                level = 0
                for event, node in self.iterparse(resource, events):
                    if event == "start":
                        if level == 0:
                            self._root.clear()
                            self._root = node
                        level += 1
                    elif event == 'end':
                        level -= 1
                        if level == path_level and \
                                (select_all or node in selector.select(self._root)):
                            yield node
                            node.clear()
                        elif level == 0:
                            node.clear()
                    elif event == 'start-ns':
                        nsmap.append(node)
                        namespaces[node[0]] = node[1]
                    else:
                        try:
                            del namespaces[nsmap.pop()[0]]
                        except KeyError:
                            pass
                        namespaces.update(nsmap)

        finally:
            if self.source is not resource:
                resource.close()
Esempio n. 8
0
    def iter_subtrees(self,
                      path=None,
                      namespaces=None,
                      lazy_mode=1,
                      ancestors=None):
        """
        XML resource subtree iterator, that yields fully loaded elements. If a
        path is provided the elements selected by the XPath expression are yielded.
        If no path is provided only the root element is yielded. For lazy resources
        the argument *lazy_mode* can change the sequence of elements yielded. There
        are five possible modes, that generate different sequences of elements:\n
          1. Only a full root element (the default mode)\n
          2. Only a root element pruned at *depth_level*\n
          3. Only the elements at *depth_level* level of the tree\n
          4. The elements at *depth_level* and then a pruned root\n
          5. An incomplete root at start, the elements at *depth_level* and a pruned root

        :param path: an optional XPath expression to select element nodes.
        :param namespaces: an optional mapping from namespace prefixes to URIs. \
        Used to provide namespace mapping for the XPath expression. If the resource \
        is lazy the namespace map is updated during the iteration.
        :param lazy_mode: defines how a lazy resource is iterated when a path \
        is not provided.
        :param ancestors: if a list is provided the iterator tracks the list of \
        ancestors of yielded elements of lazy resources.
        """
        if not (1 <= lazy_mode <= 5):
            raise XMLSchemaValueError(
                "invalid argument lazy_mode={!r}".format(lazy_mode))

        if not self._lazy:
            if path is None:
                yield self._root
            else:
                yield from iter_select(self._root,
                                       path,
                                       namespaces,
                                       strict=False)
            return

        if self.seek(0) == 0:
            resource = self.source
        elif self._url is not None:
            resource = urlopen(self._url, timeout=self.timeout)
        else:
            self.load()
            resource = StringIO(self._text)

        nsmap = []
        level = 0
        changed = False
        if namespaces is None:
            events = 'start', 'end'
        else:
            # Track ad update namespaces
            events = 'start-ns', 'end-ns', 'start', 'end'
        if ancestors is None:
            ancestors = []

        if path is None:
            subtree_level = int(self._lazy) if lazy_mode > 1 else 0
            select_all = True
            selector = None
            skip_depth_elements = lazy_mode < 3
        else:
            selector = Selector(path,
                                namespaces,
                                strict=False,
                                parser=XmlResourceXPathParser)
            path = path.replace(' ', '').replace('./', '')

            if path == '.':
                subtree_level = 0
            elif path.startswith('/'):
                subtree_level = path.count('/') - 1
            else:
                subtree_level = path.count('/') + 1

            select_all = '*' in path and set(path).issubset({'*', '/'})
            skip_depth_elements = False

        try:
            for event, node in self.iterparse(resource, events):
                if event == "start":
                    if not level:
                        self._root.clear()
                        self._root = node
                        if not path and lazy_mode == 5:
                            yield node
                        ancestors.append(node)
                    elif level < subtree_level:
                        ancestors.append(node)
                    level += 1
                elif event == 'end':
                    level -= 1
                    if not level:
                        if not path:
                            if lazy_mode != 3:
                                yield node
                        elif subtree_level:
                            pass
                        elif select_all or node in selector.select(self._root):
                            yield node
                    elif level != subtree_level:
                        if level < subtree_level:
                            ancestors.pop()
                        continue
                    elif skip_depth_elements:
                        pass
                    elif select_all or node in selector.select(self._root):
                        yield node

                    del node[:]  # delete children, keep attributes, text and tail.
                    if changed:
                        namespaces.clear()
                        namespaces.update(nsmap)
                        changed = False

                elif event == 'start-ns':
                    nsmap.append(node)
                    update_prefix(namespaces, *node)
                else:
                    nsmap.pop()
                    changed = True
        finally:
            if self.source is not resource:
                resource.close()
Esempio n. 9
0
 def test_iter_select_function(self):
     self.assertListEqual(list(iter_select(self.root, 'text()')),
                          ['Dickens'])
     self.assertListEqual(
         list(iter_select(self.root, '$a', variables={'a': True})), [True])