def do_parse(self, chart, response): selector = Selector(response) for rank, item in enumerate(selector.xpath(self.list_xpath)): entry = TomahawkItemLoader(selector=item) entry.add_value("rank", rank) entry.add_xpath("artist", './/span[@class="data"]/text()') entry.add_xpath("album", './/a/text()') chart.add_value("list", entry.load_item()) # process the item if there is no more next_pages, otherwise, return none and keep parsing next_selector = selector.xpath(self.next_page_xpath) if not next_selector: self.log("No more next page! Processing") return self.do_process_item(chart) next_page = extract(self.next_page_xpath+"/@href", selector)[-1:] if next_page and int(next_page) > 9: self.log("Maximum depth! Processing") return self.do_process_item(chart) return None
def get_boxed_name(self, selector): name = extract('.//h1[contains(@class, "fidel-black title-14")]//text()', selector) identifier = extract('.//div[@class="pull-left active-tab tab-item"]//text()', selector) return '%s %s' % (identifier, name)
def do_create_chart(self, chart, response): name = extract('//span[@class="how-it-works__title"]/text()', chart.selector) chart.add_value("name", name) chart.add_value("type", self.extract_type(chart)) chart.add_value("description", 'Description') return chart