def parse(self, response): """ This method is to extract links and return the requests """ for quote in response.xpath( "//div[@class='quote']" ): # here starts the implementation of xpath loader = ItemLoader(item=QuoteItem(), selector=quote, response=response) loader.add_xpath('text', ".//div[@class='quoteText']/text()[1]") loader.add_xpath('author', ".//span[@class='authorOrTitle']/text()") loader.add_xpath( 'tags', ".//div[@class='greyText smallText left']/a/text()") yield loader.load_item() """ Here is used the next buttom of the page to iterate until the last page """ next_page = response.xpath( "//a[@class='next_page']/@href").extract_first() if next_page is not None: next_page_link = response.urljoin(next_page) yield scrapy.Request(url=next_page_link, callback=self.parse)
def parse(self,respones): next_page = respones.selector.xpath("//a[@class='next_page']/@href").extract_first() # next_page = respones.xpath("//a[@class='next_page']/@href").extract_first() also workes if next_page is not None: next_page_link = respones.urljoin(next_page) yield scrapy.Request(url=next_page_link,callback=self.parse) for qu in respones.selector.xpath("//div[@class='quote']"): #for qu in respones.xpath("//div[@class='quote']"): also workes loder = ItemLoader(item=QuoteItem(),selector=qu,response=respones) loder.add_xpath('text',".//div[@class='quoteText']/text()[1]") loder.add_xpath('author',".//span[@class='authorOrTitle']") loder.add_xpath('tags',".//div[@class='greyText smallText left']/a") yield loder.load_item()
def parse(self, response): for quote in response.xpath("//div[@class='quote']"): loader= ItemLoader(item=QuoteItem(), selector=quote, response=response) loader.add_xpath('text', ".//div[@class='quoteText']/text()[1]") loader.add_xpath('author', ".//div[@class='quoteText']/child::span") loader.add_xpath('tags', ".//div[@class='greyText smallText left']/a") yield loader.load_item() # /quotes?page=2 next_page= response.xpath("//a[@class='next_page']/@href").extract_first() if next_page is not None: next_page_link= response.urljoin(next_page) yield scrapy.Request(url=next_page_link, callback=self.parse)
def parse(self, response): for quote in response.selector.xpath("//div[@class='quote']"): loader = ItemLoader(item=QuoteItem(), selector=quote) loader.add_xpath('text', ".//div[@class='quoteText']/text()[1]") loader.add_xpath('author', ".//span[@class='authorOrTitle']/text()") loader.add_xpath( 'tags', ".//div[@class='quoteFooter']/div/a[contains(@href, '/quotes/tag/')]" ) yield loader.load_item() # exe: /quotes?page=2 next_page = response.selector.xpath( "//a[@class='next_page']/@href").extract_first() if next_page is not None: next_page_link = response.urljoin(next_page) yield scrapy.Request(url=next_page_link, callback=self.parse)
def parse(self, response, **kwargs): for quote in response.xpath("//div[@class='quote']"): loader = ItemLoader(item=QuoteItem(),selector=quote,response=response) loader.add_xpath("text",".//div[@class='quoteText']/text()[1]") loader.add_xpath("author",".//span[@class='authorOrTitle']/text()") loader.add_xpath("tags",".//div[@class='greyText smallText left']/a/text()") yield loader.load_item() # yield { # "text": quote.xpath(".//div[@class='quoteText']/text()[1]").extract_first(), # "author": quote.xpath(".//span[@class='authorOrTitle']/text()").extract_first(), # "tags": quote.xpath(".//div[@class='greyText smallText left']/a/text()").extract(), # } next_page = response.xpath("//a[@class='next_page']/@href").extract_first() if next_page is not None: next_page_link = response.urljoin(next_page) yield scrapy.Request(url=next_page_link,callback=self.parse)