def parse(self, response): newss = Selector(response).xpath( '//div[@class="swiper-wrapper trendNav orangeNav"]') # print news for news in newss: item = SamacharItem() item['title'] = news.xpath('div/a/text()').extract_first() item['url'] = 'http://www.bhaskar.com' + \ news.xpath('div/a/@href').extract_first() item['img_title'] = news.xpath('div/a/@title').extract_first() if news.xpath('a/img/@src').extract_first(): item['img_urls'] = news.xpath('a/img/@src').extract_first() else: item['img_urls'] = news.xpath( 'a/img/@data-original').extract_first() if item['url']: request = scrapy.Request(url=item['url'], callback=self.parse_detail_page, meta={'item': item}, dont_filter=True) request.meta['item'] = item yield request
def parse(self, response): news = Selector(response).xpath("//div[@class='image-caption']/h3") print news for news in news: item = SamacharItem() item['title'] = news.xpath("normalize-space(a/text())").extract_first() item['url'] = "http://www.amarujala.com" +news.xpath("a/@href").extract_first() if item['url']: request = scrapy.Request(url=item['url'], callback=self.parse_detail_page, meta={'item':item}, dont_filter=True) request.meta['item'] =item yield request
def parse(self, response): news = Selector(response).xpath( "//figcaption[@class='figure-caption']") print news for news in news: item = SamacharItem() item['title'] = news.xpath("a/span/text()").extract_first() item['url'] = news.xpath("a/@href").extract_first() if item['url']: request = scrapy.Request(url=item['url'], callback=self.parse_detail_page, meta={'item': item}, dont_filter=True) request.meta['item'] = item yield request
def parse(self, response): news = Selector(response).xpath("//ul[@class='topicList']/li") print news for news in news: item = SamacharItem() item['title'] = news.xpath( "a/div[@class='protxt fr']/div[@class='h3']/text()" ).extract_first() item['url'] = 'https://www.jagran.com' + news.xpath( "a/@href").extract_first() if item['url']: request = scrapy.Request(url=item['url'], callback=self.parse_detail_page, meta={'item': item}, dont_filter=True) request.meta['item'] = item yield request