Example #1
0
    def find(self, selector):
        """Find elements using selector traversing down from self.

            >>> m = '<p><span><em>Whoah!</em></span></p><p><em> there</em></p>'
            >>> d = PyQuery(m)
            >>> d('p').find('em')
            [<em>, <em>]
            >>> d('p').eq(1).find('em')
            [<em>]
        """
        xpath = selector_to_xpath(selector)
        results = [child.xpath(xpath) for tag in self for child in tag.getchildren()]
        # Flatten the results
        elements = []
        for r in results:
            elements.extend(r)
        return self.__class__(elements, **dict(parent=self))
Example #2
0
 def _filter_only(self, selector, elements, reverse=False, unique=False):
     """Filters the selection set only, as opposed to also including
        descendants.
     """
     if selector is None:
         results = elements
     else:
         xpath = selector_to_xpath(selector, 'self::')
         results = []
         for tag in elements:
             results.extend(tag.xpath(xpath))
     if reverse:
         results.reverse()
     if unique:
         result_list = results
         results = []
         for item in result_list:
             if not item in results:
                 results.append(item)
     return self.__class__(results, **dict(parent=self))
Example #3
0
 def _filter_only(self, selector, elements, reverse=False, unique=False):
     """Filters the selection set only, as opposed to also including
        descendants.
     """
     if selector is None:
         results = elements
     else:
         xpath = selector_to_xpath(selector, 'self::')
         results = []
         for tag in elements:
             results.extend(tag.xpath(xpath))
     if reverse:
         results.reverse()
     if unique:
         result_list = results
         results = []
         for item in result_list:
             if not item in results:
                 results.append(item)
     return self.__class__(results, **dict(parent=self))
Example #4
0
            if isinstance(context, basestring):
                try:
                    elements = fromstring(context, self.parser)
                except Exception, e:
                    raise ValueError('%r, %s' % (e, context))
            elif isinstance(context, self.__class__):
                # copy
                elements = context[:]
            elif isinstance(context, list):
                elements = context
            elif isinstance(context, etree._Element):
                elements = [context]

            # select nodes
            if elements and selector is not no_default:
                xpath = selector_to_xpath(selector)
                results = [tag.xpath(xpath) for tag in elements]
                # Flatten the results
                elements = []
                for r in results:
                    elements.extend(r)

        list.__init__(self, elements)

    def __call__(self, *args):
        """return a new PyQuery instance
        """
        length = len(args)
        if length == 0:
            raise ValueError('You must provide at least a selector')
        if args[0] == '':
Example #5
0
            if isinstance(context, basestring):
                try:
                    elements = fromstring(context, parser)
                except Exception, e:
                    raise ValueError('%r, %s' % (e, context))
            elif isinstance(context, self.__class__):
                # copy
                elements = context[:]
            elif isinstance(context, list):
                elements = context
            elif isinstance(context, etree._Element):
                elements = [context]

            # select nodes
            if elements and selector is not no_default:
                xpath = selector_to_xpath(selector)
                results = [tag.xpath(xpath) for tag in elements]
                # Flatten the results
                elements = []
                for r in results:
                    elements.extend(r)

        list.__init__(self, elements)

    def __call__(self, *args):
        """return a new PyQuery instance
        """
        length = len(args)
        if length == 0:
            raise ValueError('You must provide at least a selector')
        if len(args) == 1 and not args[0].startswith('<'):
    def __init__(self, *args, **kwargs):
        html = None
        elements = []
        self._base_url = None
        self.parser = kwargs.get("parser", None)
        if "parser" in kwargs:
            del kwargs["parser"]

        if (
            len(args) >= 1
            and (not PY3k and isinstance(args[0], basestring) or PY3k and isinstance(args[0], str))
            and args[0].startswith("http://")
        ):
            kwargs["url"] = args[0]
            if len(args) >= 2:
                kwargs["data"] = args[1]
            args = []

        if "parent" in kwargs:
            self._parent = kwargs.pop("parent")
        else:
            self._parent = no_default

        namespaces = kwargs.get("namespaces", {})
        if "namespaces" in kwargs:
            del kwargs["namespaces"]

        if kwargs:
            # specific case to get the dom
            if "filename" in kwargs:
                html = open(kwargs["filename"])
            elif "url" in kwargs:
                url = kwargs.pop("url")
                if "opener" in kwargs:
                    opener = kwargs.pop("opener")
                    html = opener(url)
                else:
                    method = kwargs.get("method")
                    data = kwargs.get("data")
                    if type(data) in (dict, list, tuple):
                        data = urlencode(data)

                    if isinstance(method, basestring) and method.lower() == "get" and data:
                        if "?" not in url:
                            url += "?"
                        elif url[-1] not in ("?", "&"):
                            url += "&"
                        url += data
                        data = None

                    if data and PY3k:
                        data = data.encode("utf-8")

                    html = urlopen(url, data)
                    if not self.parser:
                        self.parser = "html"
                self._base_url = url
            else:
                raise ValueError("Invalid keyword arguments %s" % kwargs)
            elements = fromstring(html, self.parser)
        else:
            # get nodes

            # determine context and selector if any
            selector = context = no_default
            length = len(args)
            if len(args) == 1:
                context = args[0]
            elif len(args) == 2:
                selector, context = args
            else:
                raise ValueError("You can't do that." + " Please, provide arguments")

            # get context
            if isinstance(context, basestring):
                try:
                    elements = fromstring(context, self.parser)
                except Exception:
                    raise ValueError(repr(context))
            elif isinstance(context, self.__class__):
                # copy
                elements = context[:]
            elif isinstance(context, list):
                elements = context
            elif isinstance(context, etree._Element):
                elements = [context]

            # select nodes
            if elements and selector is not no_default:
                xpath = selector_to_xpath(selector)
                results = [tag.xpath(xpath, namespaces=namespaces) for tag in elements]
                # Flatten the results
                elements = []
                for r in results:
                    elements.extend(r)

        list.__init__(self, elements)