Example #1
0
    def parse(self, response):
        if not hasattr(self, 'parse_node'):
            raise NotConfigured('You must define parse_node method in order to scrape this XML feed')

        response = self.adapt_response(response)
        if self.iterator == 'iternodes':
            nodes = xmliter(response, self.itertag)
        elif self.iterator == 'xml':
            selector = XmlXPathSelector(response)
            self._register_namespaces(selector)
            nodes = selector.select('//%s' % self.itertag)
        elif self.iterator == 'html':
            selector = HtmlXPathSelector(response)
            self._register_namespaces(selector)
            nodes = selector.select('//%s' % self.itertag)
        else:
            raise NotSupported('Unsupported node iterator')

        return self.parse_nodes(response, nodes)
Example #2
0
    def parse(self, response):
        if not hasattr(self, 'parse_node'):
            raise NotConfigured(
                'You must define parse_node method in order to scrape this XML feed'
            )

        response = self.adapt_response(response)
        if self.iterator == 'iternodes':
            nodes = xmliter(response, self.itertag)
        elif self.iterator == 'xml':
            selector = XmlXPathSelector(response)
            self._register_namespaces(selector)
            nodes = selector.select('//%s' % self.itertag)
        elif self.iterator == 'html':
            selector = HtmlXPathSelector(response)
            self._register_namespaces(selector)
            nodes = selector.select('//%s' % self.itertag)
        else:
            raise NotSupported('Unsupported node iterator')

        return self.parse_nodes(response, nodes)
Example #3
0
 def _iternodes(self, response):
     for node in xmliter(response, self.itertag):
         self._register_namespaces(node)
         yield node
Example #4
0
File: feed.py Project: 1012/scrapy
 def _iternodes(self, response):
     for node in xmliter(response, self.itertag):
         self._register_namespaces(node)
         yield node
 def parse_cities(self, response):
     nodes = xmliter(response, "city")
     for selector in nodes:
         ret = iterate_spider_output(self.parse_city(response, selector))
         for result_item in ret:
             yield result_item
Example #6
0
 def _iternodes(self, response): #直接用 xmliter 来解析respond 过滤 tag
     for node in xmliter(response, self.itertag): #这里虽然返回的名字叫node实际上也是selector对象
         self._register_namespaces(node)
         yield node
 def parse_cities(self, response):
     nodes = xmliter(response, "city")
     for selector in nodes:
         ret = iterate_spider_output(self.parse_city(response, selector))
         for result_item in ret:
             yield result_item