def parse(self, response): items = [] for news in response.css('div.media'): headline = news.css( 'h2.media-heading.cat-header a ::text').extract_first() news_url = news.css( 'h2.media-heading.cat-header a ::attr(href)').extract_first() item = DailyMirrorSportsItem() item['news_headline'] = headline item['link'] = news_url r = Request(url=news_url, callback=self.parse_1) r.meta['item'] = item yield r items.append(item) yield {'data': items} # next_link = response.css('a.nextpostslink ::attr(href)').extract_first() # if next_link is not None: # next_url = urljoin(response.url, str(next_link)) # print("scrpping "+next_url) # yield scrapy.Request(next_url, callback=self.parse) for i in range(30, 420, 30): next_url = "http://www.dailymirror.lk/travel/" + str(i) yield scrapy.Request(next_url, callback=self.parse)
def parse(self, response): items = [] for news in response.css('div.media'): headline = news.css( 'h2.media-heading.cat-header a ::text').extract_first() news_url = news.css( 'h2.media-heading.cat-header a ::attr(href)').extract_first() item = DailyMirrorSportsItem() item['news_headline'] = headline item['link'] = news_url r = Request(url=news_url, callback=self.parse_1) r.meta['item'] = item yield r items.append(item) yield {'newsInDetails': items}
def parse(self, response): items = [] for news in response.css('#categorycontent'): headline = news.css('h2 ::text').extract_first() news_url = news.css('h2 ::attr(href)').extract_first() item = DailyMirrorSportsItem() item['news_headline'] = headline item['link'] = news_url r=Request(url=news_url, callback=self.parse_1) r.meta['item']=item yield r items.append(item) #if 'data' in item: yield {'data':items} next_link = response.css('a.nextpostslink ::attr(href)').extract_first() if next_link is not None: next_url = urljoin(response.url, str(next_link)) print("scrpping "+next_url) yield scrapy.Request(next_url, callback=self.parse)