Exemple #1
0
 def parse(self, response):
     loader = ContentLoader(response=response)
     loader.add_css('content', 'div#cceb7e72db-b12d-4887-8946-387a18f7f068')
     loader.add_css(
         'title',
         'div#cceb7e72db-b12d-4887-8946-387a18f7f068 h1.headline span span::text'
     )
     return loader.load_item()
 def parse(self, response):
     loader = ContentLoader(response=response)
     loader.add_xpath('content',
                      '//table[@class="content"]/tr[1]/td[@class="col2"]')
     loader.add_xpath(
         'title',
         '//table[@class="content"]/tr[1]/td[@class="col2"]//font[1]/text()'
     )
     return loader.load_item()
Exemple #3
0
 def parse(self, response):
     
     # The ContentLoader line always stays the same.
     loader = ContentLoader(response=response)
     
     # We need to scrape content and title.
     # Supply CSS selectors corresponding to content and title on the page(s).
     loader.add_css('content', 'div#bigContentContainer')
     loader.add_css('title', 'div#bigContentContainer h1')
     
     # The loader.load_itemm() line always stays the same.
     return loader.load_item()
 def parse_post(self, response):
     loader = ContentLoader(response=response)
     loader.add_css('title', 'div.heading h2::text')
     loader.add_css('content', 'div.article')
     return loader.load_item()
 def parse_post(self, response):
     loader = ContentLoader(response=response)
     loader.add_xpath('content', '//table[2]')
     loader.add_css('title', 'span.contTitle2')
     return loader.load_item()
 def parse_post(self, response):
     loader = ContentLoader(response=response)
     loader.add_css('content', 'div.grve-main-content-wrapper')
     page_loader = loader.nested_css('div.grve-main-content-wrapper')
     page_loader.add_css('title', 'h1, h2, h3, h4')
     return loader.load_item()
Exemple #7
0
 def parse(self, response):
     loader = ContentLoader(response=response)
     loader.add_css('title', 'article header h2::text')
     loader.add_css('content', 'article div.mura-region-local')
     return loader.load_item()
Exemple #8
0
 def parse(self, response):
     loader = ContentLoader(response=response)
     loader.add_css('content', 'div#scc-portal-article-content-container')
     loader.add_css('title', 'div#scc-portal-article-content-container h1')
     return loader.load_item()
Exemple #9
0
 def parse(self, response):
     loader = ContentLoader(response=response)
     loader.add_css('content', 'main#PAGES_CONTAINER')
     loader.add_css('title', 'title::text')
     return loader.load_item()
 def parse(self, response):
     loader = ContentLoader(response=response)
     loader.add_css('content', 'main.page-content')
     loader.add_css('title', 'main.page-content h1')
     return loader.load_item()
Exemple #11
0
 def parse_post(self, response):
     loader = ContentLoader(response=response)
     loader.add_css('content', 'div.content')
     loader.add_css('title', 'div.item h3::text')
     return loader.load_item()
 def parse(self, response):
     loader = ContentLoader(response=response)
     # 'div#main-body' is for sfdph.org, 'article' is for sfcdcp.org.
     loader.add_css('content', 'div#main-body, article')
     loader.add_css('title', 'h1')
     return loader.load_item()
 def parse_post(self, response):
     loader = ContentLoader(response=response)
     loader.add_css('content', 'article')
     loader.add_css('title', 'h1')
     return loader.load_item()
 def parse_post(self, response):
     loader = ContentLoader(response=response)
     loader.add_css('content', 'div.vpadcontentdiv')
     loader.add_css('title', 'div#newsdetails h2::text')
     return loader.load_item()
 def parse(self, response):
     loader = ContentLoader(response=response)
     loader.add_value('title', 'Sacramento Public Health')
     loader.add_css('content', 'div.news')
     return loader.load_item()
Exemple #16
0
 def parse_post(self, response):
     loader = ContentLoader(response=response)
     loader.add_css('content', 'article#content_begins')
     loader.add_css('title', 'article#content_begins h2::text')
     return loader.load_item()
 def parse(self, response):
     loader = ContentLoader(response=response)
     loader.add_css('title', 'div.content h3')
     loader.add_css('content', 'div.content')
     return loader.load_item()
 def parse(self, response):
     loader = ContentLoader(response=response)
     loader.add_css('content', 'div#main')
     loader.add_css('title', 'h1#page-title')
     return loader.load_item()