Esempio n. 1
0
    def parse(self, response):
        items = []
        for news in response.css('div.media'):
            headline = news.css(
                'h2.media-heading.cat-header a ::text').extract_first()
            news_url = news.css(
                'h2.media-heading.cat-header a ::attr(href)').extract_first()
            item = DailyMirrorSportsItem()
            item['news_headline'] = headline
            item['link'] = news_url
            r = Request(url=news_url, callback=self.parse_1)
            r.meta['item'] = item
            yield r
            items.append(item)

        yield {'data': items}

        # next_link = response.css('a.nextpostslink ::attr(href)').extract_first()
        # if next_link is not None:
        #     next_url = urljoin(response.url, str(next_link))
        #     print("scrpping "+next_url)
        #     yield scrapy.Request(next_url, callback=self.parse)

        for i in range(30, 420, 30):
            next_url = "http://www.dailymirror.lk/travel/" + str(i)
            yield scrapy.Request(next_url, callback=self.parse)
Esempio n. 2
0
    def parse(self, response):
        items = []
        for news in response.css('div.media'):
            headline = news.css(
                'h2.media-heading.cat-header a ::text').extract_first()
            news_url = news.css(
                'h2.media-heading.cat-header a ::attr(href)').extract_first()
            item = DailyMirrorSportsItem()
            item['news_headline'] = headline
            item['link'] = news_url
            r = Request(url=news_url, callback=self.parse_1)
            r.meta['item'] = item
            yield r
            items.append(item)

        yield {'newsInDetails': items}
Esempio n. 3
0
    def parse(self, response):
        items = []
        for news in response.css('#categorycontent'):
            headline = news.css('h2 ::text').extract_first()
            news_url = news.css('h2 ::attr(href)').extract_first()
            item = DailyMirrorSportsItem()
            item['news_headline'] = headline
            item['link'] = news_url
            r=Request(url=news_url, callback=self.parse_1)
            r.meta['item']=item
            yield r
            items.append(item)
        #if 'data' in item:
        yield {'data':items}

        next_link = response.css('a.nextpostslink ::attr(href)').extract_first()
        if next_link is not None:
            next_url = urljoin(response.url, str(next_link))
            print("scrpping "+next_url)
            yield scrapy.Request(next_url, callback=self.parse)