Python articleTags Exemples, genTags.articleTags Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : gsmarena.py Projet : shail4998/scrapy

    def parse(self, response):

        items = AlphaItem()

        res = response.xpath('//div[@class="news-item"]')

        for r in res:
            title = r.xpath('a/h3/text()').get()
            discription = r.xpath('p/text()').get()
            link = r.xpath('div[1]/a/@href').get()
            img_link = r.xpath('div[1]/a/img/@src').get()

            items['title'] = title
            items['discription'] = discription
            items['link'] = 'https://www.gsmarena.com/' + link
            items['img_link'] = img_link
            items['tags'] = articleTags(title)

            yield items

        next_page = 'https://www.gsmarena.com/news.php3?iPage=' + str(
            alphaspider.page_no)
        print(next_page)
        if alphaspider.page_no < 4:
            alphaspider.page_no += 1
            yield response.follow(next_page, callback=self.parse)

Exemple #2

0

Afficher le fichier

    def parse(self, response):

        items = AlphaItem()

        res = response.xpath('//div[@class="blog-list-blog"]')

        for r in res:

            title = r.xpath('figure/a/img/@title').get()
            link = r.xpath('figure/a/@href').get()
            img_link = r.xpath('figure/a/img/@src').get()

            items['title'] = title
            items['link'] = link
            items['img_link'] = img_link
            items['tags'] = articleTags(title)

            yield items

        next_page = 'https://www.news18.com/world/page-' + str(
            alphaspider.page_no)
        print(next_page)
        if alphaspider.page_no < 14:
            alphaspider.page_no += 1
            yield response.follow(next_page, callback=self.parse)

Exemple #3

0

Afficher le fichier

Fichier : hindustanTimes.py Projet : shail4998/scrapy

    def parse(self, response):

        items = AlphaItem()

        res = response.xpath('//div/div[@class="media-heading headingfour"]')

        for r in res:
            title = r.xpath('a/text()').get()
            discription = r.xpath('following-sibling::div/text()').get()
            link = r.xpath('a/@href').get()
            img_link = r.xpath(
                'parent::div/parent::div/div[1]/div/a/img/@src').get()

            items['title'] = title
            items['discription'] = discription
            items['link'] = link
            items['img_link'] = img_link
            items['tags'] = articleTags(title)

            yield items

        next_page = 'https://www.hindustantimes.com/lok-sabha-elections/news/?pageno=' + str(
            alphaspider.page_no)
        print(next_page)
        if alphaspider.page_no < 5:
            alphaspider.page_no += 1
            yield response.follow(next_page, callback=self.parse)

Exemple #4

0

Afficher le fichier

    def parse(self, response):

        items = AlphaItem()

        res = response.xpath('//article[@class="item-list"]')

        for r in res:
            title = r.xpath('div[2]/h2/a/text()').get()
            discription = r.xpath('div[2]/div/p/text()').get()
            link = r.xpath('div[1]/a/@href').get()
            img_link = r.xpath('div[1]/a/img/@src').get()

            items['title'] = title
            items['discription'] = discription
            items['link'] = link
            items['img_link'] = img_link
            items['tags'] = articleTags(title)

            yield items

        next_page = 'https://www.siasat.com/category/technology-' + str(
            alphaspider.page_no)
        print(next_page)
        if alphaspider.page_no < 5:
            alphaspider.page_no += 1
            yield response.follow(next_page, callback=self.parse)

Exemple #5

0

Afficher le fichier

Fichier : pinkvilla.py Projet : shail4998/scrapy

    def parse(self, response):

        items = AlphaItem()

        res = response.xpath('//div[@class=" section-page-teaser-item"]')

        for r in res:

            title = r.xpath('div[2]/a/@title').get()
            link = r.xpath('div[1]/a/@href').get()
            img_link = r.xpath('div[1]/a/img/@src').get()

            items['title'] = title
            items['link'] = link
            items['img_link'] = img_link
            items['tags'] = articleTags(title)

            yield items

        next_page = 'https://www.pinkvilla.com/entertainment/page/' + str(
            alphaspider.page_no)
        print(next_page)
        if alphaspider.page_no < 5:
            alphaspider.page_no += 1
            yield response.follow(next_page, callback=self.parse)

Exemple #6

0

Afficher le fichier

Fichier : NDTV_spider.py Projet : shail4998/scrapy

    def parse(self, response):

        items = AlphaItem()

        res = response.xpath('//div[@class="new_storylising_img"]')

        for r in res:

            title = r.xpath('a/@title').get()
            discription = r.xpath('following-sibling::div/div[3]/text()').get()
            link = r.xpath('a/@href').get()
            img_link = r.xpath('a/img/@src').get()

            items['title'] = title
            items['discription'] = discription
            items['link'] = link
            items['img_link'] = img_link
            items['tags'] = articleTags(title)

            yield items

        next_page = 'https://www.ndtv.com/world-news/page-' + str(
            alphaspider.page_no)
        print(next_page)
        if alphaspider.page_no < 5:
            alphaspider.page_no += 1
            yield response.follow(next_page, callback=self.parse)

Exemple #7

0

Afficher le fichier

    def parse(self, response):

        items = AlphaItem()

        res = response.xpath('//a[@class="list-item-link"]')

        for r in res:

            title = r.xpath('div[1]/img/@title').get()
            link = r.xpath('@href').get()
            img_link = r.xpath('div[1]/img/@src').get()
            discription = r.xpath('div[2]/div/text()').get()

            items['title'] = title
            items['link'] = link
            items['img_link'] = img_link
            items['tags'] = articleTags(title)
            items['discription'] = discription

            yield items

        next_page = 'https://www.firstpost.com/category/india/page/' + str(
            alphaspider.page_no)
        print(next_page)
        if alphaspider.page_no < 5:
            alphaspider.page_no += 1
            yield response.follow(next_page, callback=self.parse)

Exemple #8

0

Afficher le fichier

Fichier : indiaWeb.py Projet : shail4998/scrapy

    def parse(self, response):

        items = AlphaItem()

        res = response.xpath('//div[@class="list-item"]')

        for r in res:
            title = r.xpath('article/div[2]/h3/a/text()').get()
            discription = r.xpath('article/div[2]/div[1]/div/text()').get()
            link = r.xpath('article/div[1]/a/@href').get()
            img_link = r.xpath('article/div[1]/a/noscript/img/@src').get()

            items['title'] = title
            items['discription'] = discription
            items['link'] = link
            items['img_link'] = img_link
            items['tags'] = articleTags(title)

            yield items

        next_page = 'https://www.indianweb2.com/category/technology/page/' + str(
            alphaspider.page_no) + '/'
        print(next_page)
        if alphaspider.page_no < 5:
            alphaspider.page_no += 1
            yield response.follow(next_page, callback=self.parse)

Exemple #9

0

Afficher le fichier

    def parse(self, response):

        items = AlphaItem()

        res = response.xpath('//div[@class="m-article-landing  m-block-link"]')
        
        for r in res:
            title = r.xpath('div[2]/h2/a/@title').get()
            discription = r.xpath('div[2]/h3/text()').get()
            link = r.xpath('div[2]/h2/a/@href').get()  
            img_link =  r.xpath('div[1]/div/a/picture/img/@src').get()         

            items['title'] = title
            items['discription'] = discription
            items['link'] = link    
            items['img_link'] = img_link
            items['tags'] = articleTags(title)

            yield items

Exemple #10

0

Afficher le fichier

    def parse(self, response):

        items = AlphaItem()

        res = response.xpath('//div[@class="eachStory"]')

        for r in res:

            title = r.xpath('h3/a/text()').get()
            link = r.xpath('a/@href').get()
            img_link = r.xpath('a/span/img/@src').get()
            discription = r.xpath('p/text()').get()

            items['title'] = title
            items['link'] = 'https://economictimes.indiatimes.com' + link
            items['img_link'] = img_link
            items['discription'] = discription
            items['tags'] = articleTags(title)

            yield items

Exemple #11

0

Afficher le fichier

    def parse(self, response):

        items = AlphaItem()

        res = response.xpath(
            '//div[@class="c-entry-box--compact c-entry-box--compact--article"]'
        )

        for r in res:
            title = r.xpath('div/h2/a/text()').get()
            discription = r.xpath('div/p/text()').get()
            link = r.xpath('div/h2/a/@href').get()
            img_link = r.xpath('a[1]/div/img/@src').get()

            items['title'] = title
            items['discription'] = discription
            items['link'] = link
            items['img_link'] = 'https://www.barcablaugranes.com/' + img_link
            items['tags'] = articleTags(title)

            yield items

Exemple #12

0

Afficher le fichier

    def parse(self, response):

        items = AlphaItem()

        res = response.xpath('//article[@class="bh-cm-box bh-box-article hentry"]')
        
        for r in res:
    
            title = r.xpath('h3/a/text()').get()
            link = r.xpath('h3/a/@href').get()  
            img_link =  r.xpath('figure/a/img/@src').get()         

            items['title'] = title
            items['link'] = link
            items['img_link'] = img_link
            items['tags'] = articleTags(title)

            yield items
            
        next_page = 'https://www.bollywoodhungama.com/bollywood/page/'+ str(alphaspider.page_no)
        print(next_page)
        if alphaspider.page_no < 5:
            alphaspider.page_no +=1
            yield response.follow(next_page, callback = self.parse)