예제 #1
0
 def parse(self, response):
     items = []
     for element in response.css('div.blog-post-row div.node-internal-blog-post div.group-header div div.field-items div a')[:self.settings.attributes['SCRAPE_LIMIT'].value]:
         item = NewsRelease()
         item['title'] = element.css('::text').extract_first()
         item['link'] = element.css('::attr(href)').extract_first()
         items.append(item)
     return items
예제 #2
0
 def parse(self, response):
     items = []
     for element in response.css('div.node-article div.content a.title-link')[:self.settings.attributes['SCRAPE_LIMIT'].value]:
         item = NewsRelease()
         item['title'] = element.css('::text').extract_first()
         item['link'] = "https://www.energy.gov" + element.css('::attr(href)').extract_first()
         items.append(item)
     return items
예제 #3
0
 def parse(self, response):
     items = []
     for element in response.css('div.item-list ul li div.views-field-field-custom-title h2 a')[:self.settings.attributes['SCRAPE_LIMIT'].value]:
         item = NewsRelease()
         item['title'] = element.css('::text').extract_first()
         item['link'] = "https://www.ed.gov" + element.css('::attr(href)').extract_first()
         items.append(item)
     return items
예제 #4
0
 def parse(self, response):
     items = []
     for element in response.css('div.hudpagepad div.genlink a')[:self.settings.attributes['SCRAPE_LIMIT'].value]:
         item = NewsRelease()
         item['title'] = element.css('::text').extract_first()
         item['link'] = element.css('::attr(href)').extract_first()
         items.append(item)
     return items
예제 #5
0
 def parse(self, response):
     items = []
     for element in response.css('h2.post-title a')[:self.settings.attributes['SCRAPE_LIMIT'].value]:
         item = NewsRelease()
         item['title'] = element.css('::text').extract_first()
         item['link'] = "https://blog.dol.gov" + element.css('::attr(href)').extract_first()
         items.append(item)
     return items
예제 #6
0
 def parse(self, response):
     items = []
     for element in response.xpath("//table[@class='t-press']/tr[contains(@class, 'datarow')]/td[2]/a")[:self.settings.attributes['SCRAPE_LIMIT'].value]:
         item = NewsRelease()
         item['title'] = element.css('::text').extract_first()
         item['link'] = "https://www.treasury.gov" + element.css('::attr(href)').extract_first()
         items.append(item)
     return items
 def parse(self, response):
     items = []
     for element in response.css("div.event-desc a.read")[-self.settings.attributes['SCRAPE_LIMIT'].value:]:
         item = NewsRelease()
         item['link'] = element.css('::attr(href)').extract_first()
         request = scrapy.Request(item['link'], callback=self.parse_title)
         request.meta['item'] = item
         items.append(request)
     return items
예제 #8
0
 def parse(self, response):
     items = []
     for element in response.css('div.field-item a'):
         item = NewsRelease()
         item['link'] = element.css('::attr(href)').extract_first()
         request = scrapy.Request(item['link'],
                                  callback=self.parse_west_wing_read_title)
         request.meta['item'] = item
         items.append(request)
     return items
 def parse(self, response):
     items = []
     for element in response.css('div.page-results__wrap article'):
         item = NewsRelease()
         item['link'] = element.css('::attr(href)').extract_first()
         request = scrapy.Request(item['link'],
                                  callback=self.whitehouse_news_title)
         request.meta['item'] = item
         items.append(request)
     return items
 def parse(self, response):
     items = []
     for element in response.css(
             'div.view-display-id-page_press_releases div table tbody tr td.views-field-title a'
     )[:self.settings.attributes['SCRAPE_LIMIT'].value]:
         item = NewsRelease()
         item['title'] = element.css('::text').extract_first()
         item['link'] = "https://www.transportation.gov" + element.css(
             '::attr(href)').extract_first()
         items.append(item)
     return items
예제 #11
0
 def parse(self, response):
     items = []
     for element in response.xpath(
             "//div[@class='l-wrap']/a[@target='_self']|//div[@class='l-wrap']/p/a[@target='_self']"
     )[:self.settings.attributes['SCRAPE_LIMIT'].value]:
         item = NewsRelease()
         item['title'] = element.css('::text').extract_first()
         item['link'] = "https://www.state.gov" + element.css(
             '::attr(href)').extract_first()
         items.append(item)
     return items
예제 #12
0
 def parse(self, response):
     items = []
     for element in response.css(
             'p.speech-title a')[:self.settings.attributes['SCRAPE_LIMIT'].
                                 value]:
         item = NewsRelease()
         item['title'] = element.css('::text').extract_first()
         item['link'] = "https://www.va.gov/opa/pressrel/" + element.css(
             '::attr(href)').extract_first()
         items.append(item)
     return items
예제 #13
0
 def parse(self, response):
     items = []
     for element in response.css(
             'article.press-post h3 a')[:self.settings.
                                        attributes['SCRAPE_LIMIT'].value]:
         item = NewsRelease()
         item['title'] = element.css('::text').extract_first()
         item['link'] = "https://www.donaldjtrump.com" + element.css(
             '::attr(href)').extract_first()
         items.append(item)
     return items
 def parse(self, response):
     items = []
     for element in response.css(
             'div.blog-header h1.title a'
     )[:self.settings.attributes['SCRAPE_LIMIT'].value]:
         item = NewsRelease()
         item['title'] = element.css('::text').extract_first()
         item['link'] = "https://www.transportation.gov" + element.css(
             '::attr(href)').extract_first()
         items.append(item)
     return items
예제 #15
0
 def parse(self, response):
     items = []
     for element in response.css(
             'div.view-about-newsroom div table tbody tr td.views-field-title a'
     )[:self.settings.attributes['SCRAPE_LIMIT'].value]:
         item = NewsRelease()
         item['title'] = element.css('::text').extract_first()
         item['link'] = "https://www.sba.gov" + element.css(
             '::attr(href)').extract_first()
         items.append(item)
     return items
예제 #16
0
 def parse(self, response):
     items = []
     for element in response.css(
             'div.view-news-releases-updated div.view-content div.views-row div span a'
     )[:self.settings.attributes['SCRAPE_LIMIT'].value]:
         item = NewsRelease()
         item['title'] = element.css('::text').extract_first()
         item['link'] = "https://www.dhs.gov" + element.css(
             '::attr(href)').extract_first()
         items.append(item)
     return items
예제 #17
0
 def parse(self, response):
     items = []
     for element in response.css(
             'tr.pr-list-page-row td.views-field-field-display-title a'
     )[:self.settings.attributes['SCRAPE_LIMIT'].value]:
         item = NewsRelease()
         item['title'] = element.css('::text').extract_first()
         item['link'] = "https://www.sec.gov" + element.css(
             '::attr(href)').extract_first()
         items.append(item)
     return items
예제 #18
0
 def parse(self, response):
     items = []
     for element in response.css(
             'span.field-content a')[:self.settings.
                                     attributes['SCRAPE_LIMIT'].value]:
         item = NewsRelease()
         item['title'] = element.css('::text').extract_first()
         item['link'] = "https://www.justice.gov" + element.css(
             '::attr(href)').extract_first()
         items.append(item)
     return items
예제 #19
0
 def parse(self, response):
     items = []
     for element in response.css(
             'ul.topic_list li')[:self.settings.attributes['SCRAPE_LIMIT'].
                                 value]:
         item = NewsRelease()
         item['title'] = element.css(
             'span.topicDescription::text').extract_first()
         item['link'] = "https://www.cia.gov" + element.css(
             'span.summary a::attr(href)').extract_first()
         items.append(item)
     return items
예제 #20
0
 def parse(self, response):
     items = []
     for element in response.css('ol#stream-items-id li div.tweet')[:self.settings.attributes['SCRAPE_LIMIT'].value]:
         item = NewsRelease()
         text = element.xpath("div[@class='content']/div[@class='js-tweet-text-container']//text()[not(ancestor-or-self::a[contains(@class, 'u-hidden')])]").extract()
         good_text = ""
         for part in text:
             if "\n" not in part:
                 good_text += part
         item['title'] = good_text
         item['link'] = 'https://www.twitter.com' + element.css('::attr(data-permalink-path)').extract_first()
         items.append(item)
     return items
예제 #21
0
 def parse(self, response):
     items = []
     for element in response.css(
             'h3.yt-lockup-title a.yt-uix-tile-link'
     )[:self.settings.attributes['SCRAPE_LIMIT'].value]:
         item = NewsRelease()
         if ('UCAql2DyGU2un1Ei2nMYsqOA' in response.url):
             item['title'] = '**TRUMP TV**' + element.css(
                 '::text').extract_first()
         else:
             item['title'] = element.css('::text').extract_first()
         item['link'] = "https://www.youtube.com" + element.css(
             '::attr(href)').extract_first()
         items.append(item)
     return items