def parse(self, response): if not hasattr(self, 'parse_node'): raise NotConfigured('You must define parse_node method in order to scrape this XML feed') response = self.adapt_response(response) if self.iterator == 'iternodes': nodes = xmliter(response, self.itertag) elif self.iterator == 'xml': selector = XmlXPathSelector(response) self._register_namespaces(selector) nodes = selector.select('//%s' % self.itertag) elif self.iterator == 'html': selector = HtmlXPathSelector(response) self._register_namespaces(selector) nodes = selector.select('//%s' % self.itertag) else: raise NotSupported('Unsupported node iterator') return self.parse_nodes(response, nodes)
def parse(self, response): if not hasattr(self, 'parse_node'): raise NotConfigured( 'You must define parse_node method in order to scrape this XML feed' ) response = self.adapt_response(response) if self.iterator == 'iternodes': nodes = xmliter(response, self.itertag) elif self.iterator == 'xml': selector = XmlXPathSelector(response) self._register_namespaces(selector) nodes = selector.select('//%s' % self.itertag) elif self.iterator == 'html': selector = HtmlXPathSelector(response) self._register_namespaces(selector) nodes = selector.select('//%s' % self.itertag) else: raise NotSupported('Unsupported node iterator') return self.parse_nodes(response, nodes)
def _iternodes(self, response): for node in xmliter(response, self.itertag): self._register_namespaces(node) yield node
def parse_cities(self, response): nodes = xmliter(response, "city") for selector in nodes: ret = iterate_spider_output(self.parse_city(response, selector)) for result_item in ret: yield result_item
def _iternodes(self, response): #直接用 xmliter 来解析respond 过滤 tag for node in xmliter(response, self.itertag): #这里虽然返回的名字叫node实际上也是selector对象 self._register_namespaces(node) yield node