Beispiel #1
0
    def parse(self, response):

        for titulo in response.xpath('.//div'):
            item = RssItem()
            item.title = titulo.xpath('.//a/text()').extract_first()
            item.link = titulo.xpath('.//a/@href').extract_first()
            yield item
Beispiel #2
0
    def parse_products(self, response):
		items = response.xpath('.//*')
		for item in items:
			product = RssItem()
			title = 'prueba linkedin'
			url = item.xpath('.//a/@href').extract_first()
			
			product.title = title
			product.link = url
			
			yield product  
Beispiel #3
0
	def parse_products(self, response):
		items = response.xpath('//ul[contains(@class, "itemsList")]/li')
		for item in items:
			product = RssItem()
			title = item.xpath('.//h3/a/text()').extract_first().strip()
			url = item.xpath('.//h3/a/@href').extract_first()
			product_id = item.xpath('.//input[@name="id"]/@value').extract_first()
			price = item.xpath('.//span[@class="currencyPrice"]/text()').extract_first()
			product.title = title
			product.link = url
			product.description = price
			yield product    
Beispiel #4
0
 def parse(self, response):
     post_elements = response.css(
         ".cff-item.author-lnp---liberal-national-party")
     for post in post_elements:
         is_video_post = True if post.css(".cff-video-post") else False
         item = RssItem()
         item.author = post.css("div.cff-page-name a::text").get()
         item.pubDate = post.css(".cff-date::text").get().strip()
         item.link = post.css(".cff-viewpost-facebook::attr(href)").get()
         item.guid = item.link.link
         if is_video_post:
             item.title = post.css(".cff-poster::attr(alt)").get()
             item.description = post
         else:
             item.title = post.css(".cff-post-desc::text").get()
             item.description = post.css(".cff-post-desc").get()
         yield item
Beispiel #5
0
 def parse_item(self, response):
     item = RssItem()
     item.title = response.css("title::text").get()
     item.author = "Liberal Party of Western Australia"
     item.link = response.url
     item.guid = response.url
     item.pubDate = response.css(
         'meta[property="article:published_time"]::attr(content)').get()
     item.description = response.css(".entry-content").get()
     yield item
Beispiel #6
0
 def parse_item(self, response):
     item = RssItem()
     item.title = response.css(
         'meta[property="og:title"]::attr(content)').get()
     item.link = response.url
     item.guid = response.url
     item.pubDate = response.css("h4::text").get().strip()
     item.author = response.css("div.header h1::text").get()
     item.description = "".join(
         response.css('div[style="width:100%"] p').getall())
     yield item
Beispiel #7
0
 def parse_item(self, response):
     item = RssItem()
     item.title = response.css(
         'meta[property="og:description"]::attr(content)').get()
     item.author = "South Australian Labor"
     item.link = response.url
     item.guid = response.url
     item.pubDate = response.css(".timestampContent").get()
     item.description = "".join(
         response.css('div[data-testid="post_message"]').getall())
     yield item
Beispiel #8
0
 def parse_item(self, response):
     item = RssItem()
     item.title = response.css(
         'meta[property="og:title"]::attr(content)').get()
     item.author = " & ".join(
         response.css(".contributor .name::text").getall())
     item.pubDate = response.css(
         'meta[property="article:published_time"]::attr(content)').get()
     item.link = response.url
     item.guid = response.url
     item.description = response.css(".content").get()
     yield item
Beispiel #9
0
 def parse_item(self, response):
     item = RssItem()
     item.title = response.css("title::text").get().rsplit(" - ", 1)[0]
     item.author = "Tasmanian Labor"
     item.link = response.url
     item.guid = response.url
     item.pubDate = response.css(
         'meta[property="article:published_time"]::attr(content)').get()
     description = response.css(".content-body").get()
     description = re.sub(r"<h2>.*?</h2>.*<time>.*?</time>", "",
                          description)
     item.description = description
     yield item
Beispiel #10
0
    def parse_item(self, response):
        date_pattern = (
            r'field_news_date:"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{2}:\d{2}"'
        )

        item = RssItem()
        item.title = response.css("title::text").get().split(" | ")[0]
        item.link = response.url
        item.guid = response.url
        item.pubDate = (
            response.css("script::text")
            .re_first(date_pattern)
            .split(":", maxsplit=1)[-1]
            .strip('"')
        )
        item.description = "".join(response.css("div.rpl-markup__inner p").getall())
        item.author = "State Government of Victoria"
        yield item
Beispiel #11
0
    def parse_item(self, response):
        item = RssItem()
        item.title = response.css('font[size="5"]::text').get()
        item.link = response.url
        item.guid = response.url
        item.author = " - ".join(
            response.css('font[size="6"]::text').getall()).strip()

        re_date = r"\d{1,2} [A-Z][a-z]+ \d{4}"
        item.pubDate = response.css(".paragraph").re_first(re_date)

        description = response.css("div.content-wrap .paragraph").get()
        description = re.sub(
            f"<strong>(<span .*?>)?<font .*?>{item.title.title}(<br>)?</font>(</span>)?</strong><br>",
            "",
            description,
        )
        description = re.sub(re_date + "<br><br>", "", description)
        item.description = description
        yield item
Beispiel #12
0
 def parse(self, response):
     for category_name in response.css('.list-group-item ::text'):
         item = RssItem()
         item.title = category_name.extract()
         yield item
 def __init__(self, *args, **kwargs):
     super(self.__class__, self).__init__(*args, **kwargs)
     self.rss = RssItem()