Example #1
0
    def parse(self, response):
        items = []
        for news in response.css('article.entry-item'):
            # print(news_arr)
            item = GeneralItem()
            # append to items object
            item['news_headline'] = news.css(
                'h6.entry-title a ::text').extract_first()
            item['datetime'] = "not in use"
            news_url = news.css(
                'h6.entry-title a ::attr(href)').extract_first()
            item['link'] = news_url
            r = Request(url=news_url, callback=self.parse_1)
            r.meta['item'] = item
            yield r
            items.append(item)
        yield {"newsInDetails": items}

        next_page = response.css(
            'div.pagination.clearfix ul.page-numbers.clearfix li a.last.page-numbers ::attr(href)'
        ).extract_first()
        if next_page is not None:
            print(next_page)
            next_page = str(next_page)
            yield scrapy.Request(next_page, callback=self.parse)
Example #2
0
    def parse(self, response):
        items = []
        for news in response.css('article.media.media-game.media-game'):
            # print(news_arr)
            item = GeneralItem()
            # append to items object
            item['news_headline'] = news.css(
                'h3.media-title ::text').extract_first()
            item['datetime'] = news.css(
                'time.media-date ::attr(datetime)').extract_first()
            news_url = "https://www.gamespot.com" + \
                       news.css('a.js-event-tracking ::attr(href)').extract_first()
            item['link'] = news_url
            r = Request(url=news_url, callback=self.parse_1)
            r.meta['item'] = item
            yield r
            items.append(item)
        yield {"newsInDetails": items}

        next_page = "https://www.gamespot.com" + \
                    response.css(
                        'ul.paginate li.paginate__item.skip.next a.btn ::attr(href)').extract_first()
        if next_page is not None:
            print(next_page)
            next_page = str(next_page)
            yield scrapy.Request(next_page, callback=self.parse)
Example #3
0
    def parse(self, response):
        items = []
        for news in response.css('a.front-group-item.item'):
            # print(news_arr)
            item = GeneralItem()
            #append to items object
            item['news_headline']=news.css('h3.front-h3 ::text').extract_first().strip()
            item['datetime']="not in use"
            news_url = news.css('::attr(href)').extract_first()
            item['link']=news_url
            r=Request(url=news_url, callback=self.parse_1)
            r.meta['item']=item
            yield r
            items.append(item)
        yield {"newsInDetails":items}

        for i in range(1,8):
            next_page = "https://www.yamu.lk/recipe?page="+str(i)
            yield scrapy.Request(next_page, callback=self.parse)
Example #4
0
 def parse(self, response):
     items = []
     for news in response.css('div.small-12.medium-4.large-4.columns'):
         # print(news_arr)
         item = GeneralItem()
         # append to items object
         item['news_headline'] = news.css(
             'header.post-title.entry-header h5 ::text').extract_first()
         item['datetime'] = news.css(
             'aside.post-author.cf time ::text').extract_first()
         news_url = news.css(
             'header.post-title.entry-header h5 a ::attr(href)'
         ).extract_first()
         item['link'] = news_url
         r = Request(url=news_url, callback=self.parse_1)
         r.meta['item'] = item
         yield r
         items.append(item)
     yield {"data": items}