Python RssItem.link Examples

Programming Language: Python

Namespace/Package Name: scrapy_rss.items

Class/Type: RssItem

Method/Function: link

Examples at hotexamples.com: 5

Python RssItem.link - 5 examples found. These are the top rated real world Python examples of scrapy_rss.items.RssItem.link extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

RssItem(26)

guid(8)

title(8)

description(7)

author(5)

link(5)

pubDate(5)

category(4)

element(1)

Example #1

Show file

 def parse_item(self, response):
     item = RssItem()
     item.title = response.css(
         'meta[property="og:title"]::attr(content)').get()
     item.link = response.url
     item.guid = response.url
     item.pubDate = response.css(".title h6::text").get()
     item.author = "Liberal Victoria"
     item.description = "".join(response.css(".mr-content").extract())
     yield item

Example #2

Show file

File: wa_gov_spider.py Project: gov-rss/scrape

 def parse_item(self, response):
     item = RssItem()
     item.title = response.css("title::text").get().split(" - ",
                                                          1)[-1].strip()
     item.link = response.url
     item.guid = response.url
     item.pubDate = response.css("div.newsCreatedDate::text").get().strip()
     item.author = " & ".join(
         response.css("img.ministersPic::attr(alt)").getall())
     item.description = response.css("div.ms-rtestate-field").get()
     yield item

Example #3

Show file

File: sa_prem_spider.py Project: gov-rss/scrape

 def parse_item(self, response):
     item = RssItem()
     item.title = response.css(
         'meta[name="dcterms.title"]::attr(content)').get()
     item.link = response.url
     item.guid = response.url
     item.pubDate = response.css(
         'meta[name="dcterms.issued"]::attr(content)').get()
     item.author = response.css(
         'meta[name="article.minister"]::attr(content)').get()
     summary = response.css("div.news-detail__summary p").getall()
     body = response.css("div.news-detail__body p").getall()
     item.description = "".join(summary + body)
     return item

Example #4

Show file

File: nsw_prem_spider.py Project: gov-rss/scrape

    def parse_item(self, response):
        item = RssItem()
        item.title = response.css("title::text").get().split(" | ")[0]
        item.link = response.url
        item.guid = response.url
        item.pubDate = response.css(
            'meta[name="dcterms.date"]::attr(content)').get()
        item.description = response.css("div.nsw-wysiwyg-content").get()
        author = response.css(
            "div.standard-header__released_by div::text").getall()
        if author:
            item.author = author[-1].strip()
        else:
            item.author = "NSW Government"

        yield item

Example #5

Show file

 def parse_item(self, response):
     item = RssItem()
     item.title = (
         response.css('meta[name="DCTERMS.title"]::attr(content)').get().strip()
     )
     item.link = response.url
     item.guid = response.url
     item.pubDate = (
         response.css("script::text")
         .re_first(r'"datePublished": ".*"')
         .split(":", 1)[-1]
         .strip(' "')
     )
     author = response.css("p.statement-ministers::text").getall()
     item.author = " & ".join(author)
     description = response.css("div div p").getall()
     cutoff = 2  # publish date & author
     if len(author) > 1:
         cutoff += len(author)
     item.description = "".join(description[cutoff:])
     yield item