def parse_item(self, response): item = RssItem() item.title = response.css( 'meta[property="og:title"]::attr(content)').get() item.link = response.url item.guid = response.url item.pubDate = response.css(".title h6::text").get() item.author = "Liberal Victoria" item.description = "".join(response.css(".mr-content").extract()) yield item
def parse_item(self, response): item = RssItem() item.title = response.css("title::text").get().split(" - ", 1)[-1].strip() item.link = response.url item.guid = response.url item.pubDate = response.css("div.newsCreatedDate::text").get().strip() item.author = " & ".join( response.css("img.ministersPic::attr(alt)").getall()) item.description = response.css("div.ms-rtestate-field").get() yield item
def parse_item(self, response): item = RssItem() item.title = response.css( 'meta[name="dcterms.title"]::attr(content)').get() item.link = response.url item.guid = response.url item.pubDate = response.css( 'meta[name="dcterms.issued"]::attr(content)').get() item.author = response.css( 'meta[name="article.minister"]::attr(content)').get() summary = response.css("div.news-detail__summary p").getall() body = response.css("div.news-detail__body p").getall() item.description = "".join(summary + body) return item
def parse_item(self, response): item = RssItem() item.title = response.css("title::text").get().split(" | ")[0] item.link = response.url item.guid = response.url item.pubDate = response.css( 'meta[name="dcterms.date"]::attr(content)').get() item.description = response.css("div.nsw-wysiwyg-content").get() author = response.css( "div.standard-header__released_by div::text").getall() if author: item.author = author[-1].strip() else: item.author = "NSW Government" yield item
def parse_item(self, response): item = RssItem() item.title = ( response.css('meta[name="DCTERMS.title"]::attr(content)').get().strip() ) item.link = response.url item.guid = response.url item.pubDate = ( response.css("script::text") .re_first(r'"datePublished": ".*"') .split(":", 1)[-1] .strip(' "') ) author = response.css("p.statement-ministers::text").getall() item.author = " & ".join(author) description = response.css("div div p").getall() cutoff = 2 # publish date & author if len(author) > 1: cutoff += len(author) item.description = "".join(description[cutoff:]) yield item