def css(self, query): """ Call the ``.css()`` method for each element in this list and return their results flattened as another :class:`SelectorList`. ``query`` is the same argument as the one in :meth:`Selector.css` """ return self.__class__(flatten([x.css(query) for x in self]))
def re(self, regex, replace_entities=True): """ Call the ``.re()`` method for each element in this list and return their results flattened, as a list of unicode strings. By default, character entity references are replaced by their corresponding character (except for ``&`` and ``<``. Passing ``replace_entities`` as ``False`` switches off these replacements. """ return flatten( [x.re(regex, replace_entities=replace_entities) for x in self])
def xpath(self, xpath, namespaces=None, **kwargs): """ Call the ``.xpath()`` method for each element in this list and return their results flattened as another :class:`SelectorList`. ``query`` is the same argument as the one in :meth:`Selector.xpath` ``namespaces`` is an optional ``prefix: namespace-uri`` mapping (dict) for additional prefixes to those registered with ``register_namespace(prefix, uri)``. Contrary to ``register_namespace()``, these prefixes are not saved for future calls. Any additional named arguments can be used to pass values for XPath variables in the XPath expression, e.g.:: selector.xpath('//a[href=$url]', url="http://www.example.com") """ return self.__class__( flatten([ x.xpath(xpath, namespaces=namespaces, **kwargs) for x in self ]))
def get_value(self, value, *processors, **kw): """ Process the given ``value`` by the given ``processors`` and keyword arguments. Available keyword arguments: :param re: a regular expression to use for extracting data from the given value using :func:`~parsel.utils.extract_regex` method, applied before processors :type re: str or typing.Pattern Examples: >>> from itemloaders import ItemLoader >>> from itemloaders.processors import TakeFirst >>> loader = ItemLoader() >>> loader.get_value('name: foo', TakeFirst(), str.upper, re='name: (.+)') 'FOO' """ regex = kw.get('re', None) if regex: value = arg_to_iter(value) value = flatten(extract_regex(regex, x) for x in value) for proc in processors: if value is None: break _proc = proc proc = wrap_loader_context(proc, self.context) try: value = proc(value) except Exception as e: raise ValueError( "Error with processor %s value=%r error='%s: %s'" % (_proc.__class__.__name__, value, type(e).__name__, str(e))) return value
def _get_cssvalues(self, csss, **kw): self._check_selector_method() csss = arg_to_iter(csss) return flatten(self.selector.css(css).getall() for css in csss)
def _get_xpathvalues(self, xpaths, **kw): self._check_selector_method() xpaths = arg_to_iter(xpaths) return flatten(self.selector.xpath(xpath).getall() for xpath in xpaths)