def parse(self, response): loader = ContentLoader(response=response) loader.add_css('content', 'div#cceb7e72db-b12d-4887-8946-387a18f7f068') loader.add_css( 'title', 'div#cceb7e72db-b12d-4887-8946-387a18f7f068 h1.headline span span::text' ) return loader.load_item()
def parse(self, response): loader = ContentLoader(response=response) loader.add_xpath('content', '//table[@class="content"]/tr[1]/td[@class="col2"]') loader.add_xpath( 'title', '//table[@class="content"]/tr[1]/td[@class="col2"]//font[1]/text()' ) return loader.load_item()
def parse(self, response): # The ContentLoader line always stays the same. loader = ContentLoader(response=response) # We need to scrape content and title. # Supply CSS selectors corresponding to content and title on the page(s). loader.add_css('content', 'div#bigContentContainer') loader.add_css('title', 'div#bigContentContainer h1') # The loader.load_itemm() line always stays the same. return loader.load_item()
def parse_post(self, response): loader = ContentLoader(response=response) loader.add_css('title', 'div.heading h2::text') loader.add_css('content', 'div.article') return loader.load_item()
def parse_post(self, response): loader = ContentLoader(response=response) loader.add_xpath('content', '//table[2]') loader.add_css('title', 'span.contTitle2') return loader.load_item()
def parse_post(self, response): loader = ContentLoader(response=response) loader.add_css('content', 'div.grve-main-content-wrapper') page_loader = loader.nested_css('div.grve-main-content-wrapper') page_loader.add_css('title', 'h1, h2, h3, h4') return loader.load_item()
def parse(self, response): loader = ContentLoader(response=response) loader.add_css('title', 'article header h2::text') loader.add_css('content', 'article div.mura-region-local') return loader.load_item()
def parse(self, response): loader = ContentLoader(response=response) loader.add_css('content', 'div#scc-portal-article-content-container') loader.add_css('title', 'div#scc-portal-article-content-container h1') return loader.load_item()
def parse(self, response): loader = ContentLoader(response=response) loader.add_css('content', 'main#PAGES_CONTAINER') loader.add_css('title', 'title::text') return loader.load_item()
def parse(self, response): loader = ContentLoader(response=response) loader.add_css('content', 'main.page-content') loader.add_css('title', 'main.page-content h1') return loader.load_item()
def parse_post(self, response): loader = ContentLoader(response=response) loader.add_css('content', 'div.content') loader.add_css('title', 'div.item h3::text') return loader.load_item()
def parse(self, response): loader = ContentLoader(response=response) # 'div#main-body' is for sfdph.org, 'article' is for sfcdcp.org. loader.add_css('content', 'div#main-body, article') loader.add_css('title', 'h1') return loader.load_item()
def parse_post(self, response): loader = ContentLoader(response=response) loader.add_css('content', 'article') loader.add_css('title', 'h1') return loader.load_item()
def parse_post(self, response): loader = ContentLoader(response=response) loader.add_css('content', 'div.vpadcontentdiv') loader.add_css('title', 'div#newsdetails h2::text') return loader.load_item()
def parse(self, response): loader = ContentLoader(response=response) loader.add_value('title', 'Sacramento Public Health') loader.add_css('content', 'div.news') return loader.load_item()
def parse_post(self, response): loader = ContentLoader(response=response) loader.add_css('content', 'article#content_begins') loader.add_css('title', 'article#content_begins h2::text') return loader.load_item()
def parse(self, response): loader = ContentLoader(response=response) loader.add_css('title', 'div.content h3') loader.add_css('content', 'div.content') return loader.load_item()
def parse(self, response): loader = ContentLoader(response=response) loader.add_css('content', 'div#main') loader.add_css('title', 'h1#page-title') return loader.load_item()