def parse_directory(self, response): for li in response.css('ul.directory-url > li'): el = ExampleLoader(selector=li) el.add_css('name', 'a::text') el.add_css('description', '::text') el.add_css('link', 'a::attr(href)') el.add_value('url', response.url) yield el.load_item()
def parse(self, response): # el = ExampleLoader(response=response) # el.add_xpath('name', '//title[1]/text()') # el.add_value('url', response.url) # return el.load_item() sel = Selector(response) sites = sel.xpath('//ul[@class="directory-url"]/li') for site in sites: il = ExampleLoader(response=response, selector=site) il.add_xpath('name', 'a/text()') il.add_xpath('url', 'a/@href') il.add_xpath('description', 'text()', re='-\s([^\n]*?)\\n') yield il.load_item()
def parse_page(self, response): el = ExampleLoader(response=response) el.add_xpath("name", "//title[1]/text()") el.add_value("url", response.url) return el.load_item()