import datetime import time gasp_helper = scraperwiki.utils.swimport("gasp_helper") DEBUG = False if not DEBUG: gasp_helper = scraperwiki.utils.swimport("gasp_helper") else: import gasp_helper # This is a Python template for a Sunlight Labs Great American Scraper Project # http://services.sunlightlabs.com/gasp/legislators/ # Config gasp = gasp_helper.GaspHelper("MYAPIKEYHERE", "M000639") def _get_html_dom(url): html = scraperwiki.scrape(url) return lxml.html.fromstring(html) # Scrape contact info for offices def scrape_offices(): print "Scraping offices" root = _get_html_dom("http://menendez.senate.gov/services/assistance/") officenames = [ "dc", "barrington", "newark" ] # each div class has different name, so create an array of names
# http://services.sunlightlabs.com/gasp/legislators/ import scraperwiki import lxml.html import datetime import time DEBUG = False if not DEBUG: gasp_helper = scraperwiki.utils.swimport("gasp_helper") else: import gasp_helper # Step 1) PLACE YOUR CONFIG LINE HERE # (it should look something like `gasp = gasp_helper.GaspHelper("your-api-key", "P000001")`) gasp = gasp_helper.GaspHelper("8de2a31f83cf4546a61d9d5cc57e542e", "L000504") # Step 2) Write Your Scraper Here # (refer to https://scraperwiki.com/scrapers/gasp_helper for documentation) def _get_html_dom(url): html = scraperwiki.scrape(url) return lxml.html.fromstring(html) # Scrape contact info for offices -- this code is not yet working #def scrape_offices(): # print "Scraping offices" # root = _get_html_dom("http://lugar.senate.gov/contact/offices/") # officenames = ["evansville","fortwayne","indy","valparaiso","dc"] # each div class has different name, so create an array of names # for oname in officenames:
import time DEBUG = False if not DEBUG: gasp_helper = scraperwiki.utils.swimport("gasp_helper") else: import gasp_helper # This is a Python template for a Sunlight Labs Great American Scraper Project # http://services.sunlightlabs.com/gasp/legislators/ # Config gasp = gasp_helper.GaspHelper("aacf0db461fe47179a15f3c5422100dc", "B001268") def _get_html_dom(url): html = scraperwiki.scrape(url) return lxml.html.fromstring(html) # Scrape contact info for offices def scrape_offices(): print "Scraping offices" root = _get_html_dom("http://www.scottbrown.senate.gov/public/index.cfm/officeinformation/") officenames = ["dc","boston","plymouth","springfield","worcester"] # each div class has different name, so create an array of names for oname in officenames: for tr in root.cssselect("div[class='"+oname+"'] div[class='content']"): # finds divs with class attributes equalling oname, finds content inside (which is the same across divs) print tr
import time DEBUG = False if not DEBUG: gasp_helper = scraperwiki.utils.swimport("gasp_helper") else: import gasp_helper # This is a Python template for a Sunlight Labs Great American Scraper Project # http://services.sunlightlabs.com/gasp/legislators/ # Config gasp = gasp_helper.GaspHelper("721dab355e47475da32b69d30b16c9cb", "G000570") def _get_html_dom(url): html = scraperwiki.scrape(url) return lxml.html.fromstring(html) def scrape_offices(): print "Scraping Offices" root = _get_html_dom("https://guinta.house.gov/contact-me") offices = root.cssselect("div#office-locations-wrapper") addresses = offices[0].text_content() print addresses def scrape_biography(): print "Scraping bio" root = _get_html_dom("http://guinta.house.gov/about-me/full-biography")
import scraperwiki try: swutils = scraperwiki.utils except AttributeError: import gasp_helper else: swutils.swimport("gasp_helper") API_KEY = '' # sunlight api key here BIOGUIDE_ID = '' # bioguide id here gasp = gasp_helper.GaspHelper(API_KEY, BIOGUIDE_ID)
import lxml.html import datetime import time DEBUG = False if not DEBUG: gasp_helper = scraperwiki.utils.swimport("gasp_helper") else: import gasp_helper # This is a Python template for a Sunlight Labs Great American Scraper Project # http://services.sunlightlabs.com/gasp/legislators/ # Config gasp = gasp_helper.GaspHelper("ab4686629daa4f80b6b1559eaa7eee4b", "L000554") def _get_html_dom(url): html = scraperwiki.scrape(url) return lxml.html.fromstring(html) # Scrape contact info for offices def scrape_offices(): print "Scraping offices" root = _get_html_dom( "http://lobiondo.house.gov/office/mays-landing-office") for office in range(2): addr_text = root.cssselect("div.address")[office].text_content() city_text = root.cssselect("div.city")[office].text_content()