def parse(self, response): items = AlphaItem() res = response.xpath('//div[@class="news-item"]') for r in res: title = r.xpath('a/h3/text()').get() discription = r.xpath('p/text()').get() link = r.xpath('div[1]/a/@href').get() img_link = r.xpath('div[1]/a/img/@src').get() items['title'] = title items['discription'] = discription items['link'] = 'https://www.gsmarena.com/' + link items['img_link'] = img_link items['tags'] = articleTags(title) yield items next_page = 'https://www.gsmarena.com/news.php3?iPage=' + str( alphaspider.page_no) print(next_page) if alphaspider.page_no < 4: alphaspider.page_no += 1 yield response.follow(next_page, callback=self.parse)
def parse(self, response): items = AlphaItem() res = response.xpath('//div[@class="blog-list-blog"]') for r in res: title = r.xpath('figure/a/img/@title').get() link = r.xpath('figure/a/@href').get() img_link = r.xpath('figure/a/img/@src').get() items['title'] = title items['link'] = link items['img_link'] = img_link items['tags'] = articleTags(title) yield items next_page = 'https://www.news18.com/world/page-' + str( alphaspider.page_no) print(next_page) if alphaspider.page_no < 14: alphaspider.page_no += 1 yield response.follow(next_page, callback=self.parse)
def parse(self, response): items = AlphaItem() res = response.xpath('//div/div[@class="media-heading headingfour"]') for r in res: title = r.xpath('a/text()').get() discription = r.xpath('following-sibling::div/text()').get() link = r.xpath('a/@href').get() img_link = r.xpath( 'parent::div/parent::div/div[1]/div/a/img/@src').get() items['title'] = title items['discription'] = discription items['link'] = link items['img_link'] = img_link items['tags'] = articleTags(title) yield items next_page = 'https://www.hindustantimes.com/lok-sabha-elections/news/?pageno=' + str( alphaspider.page_no) print(next_page) if alphaspider.page_no < 5: alphaspider.page_no += 1 yield response.follow(next_page, callback=self.parse)
def parse(self, response): items = AlphaItem() res = response.xpath('//article[@class="item-list"]') for r in res: title = r.xpath('div[2]/h2/a/text()').get() discription = r.xpath('div[2]/div/p/text()').get() link = r.xpath('div[1]/a/@href').get() img_link = r.xpath('div[1]/a/img/@src').get() items['title'] = title items['discription'] = discription items['link'] = link items['img_link'] = img_link items['tags'] = articleTags(title) yield items next_page = 'https://www.siasat.com/category/technology-' + str( alphaspider.page_no) print(next_page) if alphaspider.page_no < 5: alphaspider.page_no += 1 yield response.follow(next_page, callback=self.parse)
def parse(self, response): items = AlphaItem() res = response.xpath('//div[@class=" section-page-teaser-item"]') for r in res: title = r.xpath('div[2]/a/@title').get() link = r.xpath('div[1]/a/@href').get() img_link = r.xpath('div[1]/a/img/@src').get() items['title'] = title items['link'] = link items['img_link'] = img_link items['tags'] = articleTags(title) yield items next_page = 'https://www.pinkvilla.com/entertainment/page/' + str( alphaspider.page_no) print(next_page) if alphaspider.page_no < 5: alphaspider.page_no += 1 yield response.follow(next_page, callback=self.parse)
def parse(self, response): items = AlphaItem() res = response.xpath('//div[@class="new_storylising_img"]') for r in res: title = r.xpath('a/@title').get() discription = r.xpath('following-sibling::div/div[3]/text()').get() link = r.xpath('a/@href').get() img_link = r.xpath('a/img/@src').get() items['title'] = title items['discription'] = discription items['link'] = link items['img_link'] = img_link items['tags'] = articleTags(title) yield items next_page = 'https://www.ndtv.com/world-news/page-' + str( alphaspider.page_no) print(next_page) if alphaspider.page_no < 5: alphaspider.page_no += 1 yield response.follow(next_page, callback=self.parse)
def parse(self, response): items = AlphaItem() res = response.xpath('//a[@class="list-item-link"]') for r in res: title = r.xpath('div[1]/img/@title').get() link = r.xpath('@href').get() img_link = r.xpath('div[1]/img/@src').get() discription = r.xpath('div[2]/div/text()').get() items['title'] = title items['link'] = link items['img_link'] = img_link items['tags'] = articleTags(title) items['discription'] = discription yield items next_page = 'https://www.firstpost.com/category/india/page/' + str( alphaspider.page_no) print(next_page) if alphaspider.page_no < 5: alphaspider.page_no += 1 yield response.follow(next_page, callback=self.parse)
def parse(self, response): items = AlphaItem() res = response.xpath('//div[@class="list-item"]') for r in res: title = r.xpath('article/div[2]/h3/a/text()').get() discription = r.xpath('article/div[2]/div[1]/div/text()').get() link = r.xpath('article/div[1]/a/@href').get() img_link = r.xpath('article/div[1]/a/noscript/img/@src').get() items['title'] = title items['discription'] = discription items['link'] = link items['img_link'] = img_link items['tags'] = articleTags(title) yield items next_page = 'https://www.indianweb2.com/category/technology/page/' + str( alphaspider.page_no) + '/' print(next_page) if alphaspider.page_no < 5: alphaspider.page_no += 1 yield response.follow(next_page, callback=self.parse)
def parse(self, response): items = AlphaItem() res = response.xpath('//div[@class="m-article-landing m-block-link"]') for r in res: title = r.xpath('div[2]/h2/a/@title').get() discription = r.xpath('div[2]/h3/text()').get() link = r.xpath('div[2]/h2/a/@href').get() img_link = r.xpath('div[1]/div/a/picture/img/@src').get() items['title'] = title items['discription'] = discription items['link'] = link items['img_link'] = img_link items['tags'] = articleTags(title) yield items
def parse(self, response): items = AlphaItem() res = response.xpath('//div[@class="eachStory"]') for r in res: title = r.xpath('h3/a/text()').get() link = r.xpath('a/@href').get() img_link = r.xpath('a/span/img/@src').get() discription = r.xpath('p/text()').get() items['title'] = title items['link'] = 'https://economictimes.indiatimes.com' + link items['img_link'] = img_link items['discription'] = discription items['tags'] = articleTags(title) yield items
def parse(self, response): items = AlphaItem() res = response.xpath( '//div[@class="c-entry-box--compact c-entry-box--compact--article"]' ) for r in res: title = r.xpath('div/h2/a/text()').get() discription = r.xpath('div/p/text()').get() link = r.xpath('div/h2/a/@href').get() img_link = r.xpath('a[1]/div/img/@src').get() items['title'] = title items['discription'] = discription items['link'] = link items['img_link'] = 'https://www.barcablaugranes.com/' + img_link items['tags'] = articleTags(title) yield items
def parse(self, response): items = AlphaItem() res = response.xpath('//article[@class="bh-cm-box bh-box-article hentry"]') for r in res: title = r.xpath('h3/a/text()').get() link = r.xpath('h3/a/@href').get() img_link = r.xpath('figure/a/img/@src').get() items['title'] = title items['link'] = link items['img_link'] = img_link items['tags'] = articleTags(title) yield items next_page = 'https://www.bollywoodhungama.com/bollywood/page/'+ str(alphaspider.page_no) print(next_page) if alphaspider.page_no < 5: alphaspider.page_no +=1 yield response.follow(next_page, callback = self.parse)