def parse(self, response): items = [] for news in response.css('article.entry-item'): # print(news_arr) item = GeneralItem() # append to items object item['news_headline'] = news.css( 'h6.entry-title a ::text').extract_first() item['datetime'] = "not in use" news_url = news.css( 'h6.entry-title a ::attr(href)').extract_first() item['link'] = news_url r = Request(url=news_url, callback=self.parse_1) r.meta['item'] = item yield r items.append(item) yield {"newsInDetails": items} next_page = response.css( 'div.pagination.clearfix ul.page-numbers.clearfix li a.last.page-numbers ::attr(href)' ).extract_first() if next_page is not None: print(next_page) next_page = str(next_page) yield scrapy.Request(next_page, callback=self.parse)
def parse(self, response): items = [] for news in response.css('article.media.media-game.media-game'): # print(news_arr) item = GeneralItem() # append to items object item['news_headline'] = news.css( 'h3.media-title ::text').extract_first() item['datetime'] = news.css( 'time.media-date ::attr(datetime)').extract_first() news_url = "https://www.gamespot.com" + \ news.css('a.js-event-tracking ::attr(href)').extract_first() item['link'] = news_url r = Request(url=news_url, callback=self.parse_1) r.meta['item'] = item yield r items.append(item) yield {"newsInDetails": items} next_page = "https://www.gamespot.com" + \ response.css( 'ul.paginate li.paginate__item.skip.next a.btn ::attr(href)').extract_first() if next_page is not None: print(next_page) next_page = str(next_page) yield scrapy.Request(next_page, callback=self.parse)
def parse(self, response): items = [] for news in response.css('a.front-group-item.item'): # print(news_arr) item = GeneralItem() #append to items object item['news_headline']=news.css('h3.front-h3 ::text').extract_first().strip() item['datetime']="not in use" news_url = news.css('::attr(href)').extract_first() item['link']=news_url r=Request(url=news_url, callback=self.parse_1) r.meta['item']=item yield r items.append(item) yield {"newsInDetails":items} for i in range(1,8): next_page = "https://www.yamu.lk/recipe?page="+str(i) yield scrapy.Request(next_page, callback=self.parse)
def parse(self, response): items = [] for news in response.css('div.small-12.medium-4.large-4.columns'): # print(news_arr) item = GeneralItem() # append to items object item['news_headline'] = news.css( 'header.post-title.entry-header h5 ::text').extract_first() item['datetime'] = news.css( 'aside.post-author.cf time ::text').extract_first() news_url = news.css( 'header.post-title.entry-header h5 a ::attr(href)' ).extract_first() item['link'] = news_url r = Request(url=news_url, callback=self.parse_1) r.meta['item'] = item yield r items.append(item) yield {"data": items}