Ejemplo n.º 1
0
    def _parse(self):
        super(XsdSelector, self)._parse()
        try:
            self.path = self.elem.attrib['xpath']
        except KeyError:
            self.parse_error("'xpath' attribute required:", self.elem)
            self.path = "*"
        else:
            if not self.pattern.match(self.path.replace(' ', '')):
                self.parse_error("Wrong XPath expression for an xs:selector")

        try:
            self.xpath_selector = Selector(self.path,
                                           self.namespaces,
                                           parser=XsdIdentityXPathParser)
        except ElementPathError as err:
            self.parse_error(err)
            self.xpath_selector = Selector('*',
                                           self.namespaces,
                                           parser=XsdIdentityXPathParser)

        # XSD 1.1 xpathDefaultNamespace attribute
        if self.schema.XSD_VERSION > '1.0':
            if 'xpathDefaultNamespace' in self.elem.attrib:
                self.xpath_default_namespace = self._parse_xpath_default_namespace(
                    self.elem)
            else:
                self.xpath_default_namespace = self.schema.xpath_default_namespace
Ejemplo n.º 2
0
 def test_issue_001(self):
     selector = Selector("//FullPath[ends-with(., 'Temp')]")
     self.assertListEqual(selector.select(ElementTree.XML('<A/>')), [])
     self.assertListEqual(selector.select(ElementTree.XML('<FullPath/>')),
                          [])
     root = ElementTree.XML('<FullPath>High Temp</FullPath>')
     self.assertListEqual(selector.select(root), [root])
Ejemplo n.º 3
0
    def _parse(self):
        super(XsdSelector, self)._parse()
        try:
            self.path = self.elem.attrib['xpath']
        except KeyError:
            self.parse_error("'xpath' attribute required:", self.elem)
            self.path = "*"

        try:
            self.xpath_selector = Selector(self.path,
                                           self.namespaces,
                                           parser=XsdConstraintXPathParser)
        except ElementPathSyntaxError as err:
            self.parse_error(err)
            self.xpath_selector = Selector('*',
                                           self.namespaces,
                                           parser=XsdConstraintXPathParser)

        # XSD 1.1 xpathDefaultNamespace attribute
        if self.schema.XSD_VERSION > '1.0':
            try:
                self._xpath_default_namespace = get_xpath_default_namespace(
                    self.elem, self.namespaces[''], self.target_namespace)
            except XMLSchemaValueError as error:
                self.parse_error(str(error))
                self._xpath_default_namespace = self.namespaces['']
Ejemplo n.º 4
0
    def iterfind(self, path=None, namespaces=None):
        """XML resource tree iterfind selector."""
        if not self._lazy:
            if path is None:
                yield self._root
            else:
                for e in iter_select(self._root,
                                     path,
                                     namespaces,
                                     strict=False):
                    yield e
            return
        elif self._fid is not None:
            self._fid.seek(0)
            resource = self._fid
        elif self._url is not None:
            resource = urlopen(self._url, timeout=self.timeout)
        else:
            self.load()
            resource = StringIO(self._text)

        try:
            if path is None:
                level = 0
                for event, elem in self.iterparse(resource,
                                                  events=('start', 'end')):
                    if event == "start":
                        if level == 0:
                            self._root.clear()
                            self._root = elem
                        level += 1
                    else:
                        level -= 1
                        if level == 0:
                            yield elem
                            elem.clear()
            else:
                selector = Selector(path, namespaces, strict=False)
                level = 0
                for event, elem in self.iterparse(resource,
                                                  events=('start', 'end')):
                    if event == "start":
                        if level == 0:
                            self._root.clear()
                            self._root = elem
                        level += 1
                    else:
                        level -= 1
                        if elem in selector.select(self._root):
                            yield elem
                            elem.clear()
                        elif level == 0:
                            elem.clear()
        finally:
            if self._fid is None:
                resource.close()
Ejemplo n.º 5
0
    def iterfind(self, path=None, namespaces=None):
        """XML resource tree iterfind selector."""
        if not self._lazy:
            if path is None:
                yield self._root
            else:
                for e in iter_select(self._root, path, namespaces, strict=False):
                    yield e
            return
        elif self.seek(0) == 0:
            resource = self.source
        elif self._url is not None:
            resource = urlopen(self._url, timeout=self.timeout)
        else:
            self.load()
            resource = StringIO(self._text)

        try:
            if path is None:
                level = 0
                for event, elem in self.iterparse(resource, events=('start', 'end')):
                    if event == "start":
                        if level == 0:
                            self._root.clear()
                            self._root = elem
                        level += 1
                    else:
                        level -= 1
                        if level == 0:
                            yield elem
                            elem.clear()
            else:
                selector = Selector(path, namespaces, strict=False, parser=XmlResourceXPathParser)
                path = path.replace(' ', '').replace('./', '')
                path_level = path.count('/') + 1 if path != '.' else 0
                select_all = '*' in path and set(path).issubset({'*', '/'})

                level = 0
                for event, elem in self.iterparse(resource, events=('start', 'end')):
                    if event == "start":
                        if level == 0:
                            self._root.clear()
                            self._root = elem
                        level += 1
                    else:
                        level -= 1
                        if level == path_level and \
                                (select_all or elem in selector.select(self._root)):
                            yield elem
                            elem.clear()
                        elif level == 0:
                            elem.clear()
        finally:
            if self.source is not resource:
                resource.close()
Ejemplo n.º 6
0
    def test_issue_042(self):
        selector1 = Selector('text()')
        selector2 = Selector('sup[last()]/preceding-sibling::text()')
        root = ElementTree.XML(
            '<root>a<sup>1</sup>b<sup>2</sup>c<sup>3</sup></root>')
        self.assertListEqual(selector1.select(root), selector2.select(root))

        selector2 = Selector('sup[1]/following-sibling::text()')
        root = ElementTree.XML(
            '<root><sup>1</sup>b<sup>2</sup>c<sup>3</sup>d</root>')
        self.assertListEqual(selector1.select(root), selector2.select(root))
Ejemplo n.º 7
0
    def test_selector_class(self):
        selector = Selector('/A')
        self.assertEqual(repr(selector),
                         "Selector(path='/A', parser=XPath2Parser)")
        self.assertEqual(selector.namespaces, XPath2Parser.DEFAULT_NAMESPACES)

        selector = Selector('text()')
        self.assertListEqual(selector.select(self.root), ['Dickens'])
        self.assertListEqual(list(selector.iter_select(self.root)),
                             ['Dickens'])

        selector = Selector('$a', variables={'a': 1})
        self.assertEqual(selector.select(self.root), 1)
        self.assertListEqual(list(selector.iter_select(self.root)), [1])
Ejemplo n.º 8
0
 def get_context_elements(self, xml_document, rule_context, namespaces,
                          variables):
     result = select(xml_document,
                     rule_context,
                     namespaces=namespaces,
                     variables=variables)
     if rule_context.startswith('/'):
         return result
     else:
         selector = Selector(rule_context,
                             namespaces=namespaces,
                             variables=variables)
         for el in xml_document.iter():
             result.extend(selector.select(el))
         return result
Ejemplo n.º 9
0
 def test_xpath_issues(self):
     namespaces = {'ps': "http://schemas.microsoft.com/powershell/2004/04"}
     selector = Selector("./ps:Props/*|./ps:MS/*",
                         namespaces=namespaces,
                         parser=XPath1Parser)
     self.assertTrue(
         selector.root_token.tree,
         '(| (/ (/ (.) (: (ps) (Props))) (*)) (/ (/ (.) (: (ps) (MS))) (*)))'
     )
Ejemplo n.º 10
0
    def _parse(self):
        super(XsdSelector, self)._parse()
        try:
            self.path = self.elem.attrib['xpath']
        except KeyError:
            self._parse_error("'xpath' attribute required:", self.elem)
            self.path = "*"

        try:
            self.xpath_selector = Selector(self.path, self.namespaces, parser=XsdConstraintXPathParser)
        except XMLSchemaParseError as err:
            self._parse_error("invalid XPath expression: %s" % str(err), self.elem)
            self.xpath_selector = Selector('*', self.namespaces, parser=XsdConstraintXPathParser)

        # XSD 1.1 xpathDefaultNamespace attribute
        if self.schema.XSD_VERSION > '1.0':
            self.xpath_default_namespace = self._parse_xpath_default_namespace_attribute(
                self.elem, self.namespaces, self.target_namespace
            )
Ejemplo n.º 11
0
 def test_xpath_descendants(self):
     selector = Selector('.//xs:element', self.xs2.namespaces, parser=XPath1Parser)
     elements = list(selector.iter_select(self.xs2.root))
     self.assertEqual(len(elements), 14)
     selector = Selector('.//xs:element|.//xs:attribute|.//xs:keyref', self.xs2.namespaces, parser=XPath1Parser)
     elements = list(selector.iter_select(self.xs2.root))
     self.assertEqual(len(elements), 17)
Ejemplo n.º 12
0
 def test_rel_xpath_boolean(self):
     root = ElementTree.XML('<A><B><C/></B></A>')
     el = root[0]
     self.assertTrue(Selector('boolean(C)').iter_select(el))
     self.assertFalse(next(Selector('boolean(D)').iter_select(el)))
Ejemplo n.º 13
0
    def iterfind(self, path=None, namespaces=None):
        """
        XML resource tree iterfind selector.

        :param path: an XPath expression to select nodes. If not provided the \
        iteration returns only the root node.
        :param namespaces: optional mapping from namespace prefixes to URIs. If the \
        resource is lazy and an empty dictionary is provided, the namespace map is \
        updated during the iteration.
        """
        if not self._lazy:
            if path is None:
                yield self._root
            else:
                for e in iter_select(self._root,
                                     path,
                                     namespaces,
                                     strict=False):
                    yield e
            return
        elif self.seek(0) == 0:
            resource = self.source
        elif self._url is not None:
            resource = urlopen(self._url, timeout=self.timeout)
        else:
            self.load()
            resource = StringIO(self._text)

        if namespaces or namespaces is None:
            events = ('start', 'end')
            nsmap = None
        else:
            # Track ad update namespaces
            events = ('start-ns', 'end-ns', 'start', 'end')
            nsmap = []

        try:
            if path is None:
                level = 0
                for event, node in self.iterparse(resource, events):
                    if event == "start":
                        if level == 0:
                            self._root.clear()
                            self._root = node
                        level += 1
                    elif event == 'end':
                        level -= 1
                        if level == 0:
                            yield node
                            node.clear()
                    elif event == 'start-ns':
                        nsmap.append(node)
                        namespaces[node[0]] = node[1]
                    else:
                        try:
                            del namespaces[nsmap.pop()[0]]
                        except KeyError:
                            pass
                        namespaces.update(nsmap)

            else:
                selector = Selector(path,
                                    namespaces,
                                    strict=False,
                                    parser=XmlResourceXPathParser)
                path = path.replace(' ', '').replace('./', '')
                path_level = path.count('/') + 1 if path != '.' else 0
                select_all = '*' in path and set(path).issubset({'*', '/'})

                level = 0
                for event, node in self.iterparse(resource, events):
                    if event == "start":
                        if level == 0:
                            self._root.clear()
                            self._root = node
                        level += 1
                    elif event == 'end':
                        level -= 1
                        if level == path_level and \
                                (select_all or node in selector.select(self._root)):
                            yield node
                            node.clear()
                        elif level == 0:
                            node.clear()
                    elif event == 'start-ns':
                        nsmap.append(node)
                        namespaces[node[0]] = node[1]
                    else:
                        try:
                            del namespaces[nsmap.pop()[0]]
                        except KeyError:
                            pass
                        namespaces.update(nsmap)

        finally:
            if self.source is not resource:
                resource.close()
Ejemplo n.º 14
0
    def iter_subtrees(self,
                      path=None,
                      namespaces=None,
                      lazy_mode=1,
                      ancestors=None):
        """
        XML resource subtree iterator, that yields fully loaded elements. If a
        path is provided the elements selected by the XPath expression are yielded.
        If no path is provided only the root element is yielded. For lazy resources
        the argument *lazy_mode* can change the sequence of elements yielded. There
        are five possible modes, that generate different sequences of elements:\n
          1. Only a full root element (the default mode)\n
          2. Only a root element pruned at *depth_level*\n
          3. Only the elements at *depth_level* level of the tree\n
          4. The elements at *depth_level* and then a pruned root\n
          5. An incomplete root at start, the elements at *depth_level* and a pruned root

        :param path: an optional XPath expression to select element nodes.
        :param namespaces: an optional mapping from namespace prefixes to URIs. \
        Used to provide namespace mapping for the XPath expression. If the resource \
        is lazy the namespace map is updated during the iteration.
        :param lazy_mode: defines how a lazy resource is iterated when a path \
        is not provided.
        :param ancestors: if a list is provided the iterator tracks the list of \
        ancestors of yielded elements of lazy resources.
        """
        if not (1 <= lazy_mode <= 5):
            raise XMLSchemaValueError(
                "invalid argument lazy_mode={!r}".format(lazy_mode))

        if not self._lazy:
            if path is None:
                yield self._root
            else:
                yield from iter_select(self._root,
                                       path,
                                       namespaces,
                                       strict=False)
            return

        if self.seek(0) == 0:
            resource = self.source
        elif self._url is not None:
            resource = urlopen(self._url, timeout=self.timeout)
        else:
            self.load()
            resource = StringIO(self._text)

        nsmap = []
        level = 0
        changed = False
        if namespaces is None:
            events = 'start', 'end'
        else:
            # Track ad update namespaces
            events = 'start-ns', 'end-ns', 'start', 'end'
        if ancestors is None:
            ancestors = []

        if path is None:
            subtree_level = int(self._lazy) if lazy_mode > 1 else 0
            select_all = True
            selector = None
            skip_depth_elements = lazy_mode < 3
        else:
            selector = Selector(path,
                                namespaces,
                                strict=False,
                                parser=XmlResourceXPathParser)
            path = path.replace(' ', '').replace('./', '')

            if path == '.':
                subtree_level = 0
            elif path.startswith('/'):
                subtree_level = path.count('/') - 1
            else:
                subtree_level = path.count('/') + 1

            select_all = '*' in path and set(path).issubset({'*', '/'})
            skip_depth_elements = False

        try:
            for event, node in self.iterparse(resource, events):
                if event == "start":
                    if not level:
                        self._root.clear()
                        self._root = node
                        if not path and lazy_mode == 5:
                            yield node
                        ancestors.append(node)
                    elif level < subtree_level:
                        ancestors.append(node)
                    level += 1
                elif event == 'end':
                    level -= 1
                    if not level:
                        if not path:
                            if lazy_mode != 3:
                                yield node
                        elif subtree_level:
                            pass
                        elif select_all or node in selector.select(self._root):
                            yield node
                    elif level != subtree_level:
                        if level < subtree_level:
                            ancestors.pop()
                        continue
                    elif skip_depth_elements:
                        pass
                    elif select_all or node in selector.select(self._root):
                        yield node

                    del node[:]  # delete children, keep attributes, text and tail.
                    if changed:
                        namespaces.clear()
                        namespaces.update(nsmap)
                        changed = False

                elif event == 'start-ns':
                    nsmap.append(node)
                    update_prefix(namespaces, *node)
                else:
                    nsmap.pop()
                    changed = True
        finally:
            if self.source is not resource:
                resource.close()