Exemple #1
0
import datetime
import time
gasp_helper = scraperwiki.utils.swimport("gasp_helper")

DEBUG = False

if not DEBUG:
    gasp_helper = scraperwiki.utils.swimport("gasp_helper")
else:
    import gasp_helper

# This is a Python template for a Sunlight Labs Great American Scraper Project
# http://services.sunlightlabs.com/gasp/legislators/

# Config
gasp = gasp_helper.GaspHelper("MYAPIKEYHERE", "M000639")


def _get_html_dom(url):
    html = scraperwiki.scrape(url)
    return lxml.html.fromstring(html)


# Scrape contact info for offices
def scrape_offices():
    print "Scraping offices"
    root = _get_html_dom("http://menendez.senate.gov/services/assistance/")

    officenames = [
        "dc", "barrington", "newark"
    ]  # each div class has different name, so create an array of names
Exemple #2
0
# http://services.sunlightlabs.com/gasp/legislators/

import scraperwiki
import lxml.html
import datetime
import time

DEBUG = False

if not DEBUG:
    gasp_helper = scraperwiki.utils.swimport("gasp_helper")
else:
    import gasp_helper
# Step 1) PLACE YOUR CONFIG LINE HERE
#         (it should look something like `gasp = gasp_helper.GaspHelper("your-api-key", "P000001")`)
gasp = gasp_helper.GaspHelper("8de2a31f83cf4546a61d9d5cc57e542e", "L000504")


# Step 2) Write Your Scraper Here
#         (refer to https://scraperwiki.com/scrapers/gasp_helper for documentation)
def _get_html_dom(url):
    html = scraperwiki.scrape(url)
    return lxml.html.fromstring(html)


# Scrape contact info for offices -- this code is not yet working
#def scrape_offices():
#    print "Scraping offices"
#    root = _get_html_dom("http://lugar.senate.gov/contact/offices/")
#    officenames = ["evansville","fortwayne","indy","valparaiso","dc"]  # each div class has different name, so create an array of names
#    for oname in officenames:
Exemple #3
0
import time

DEBUG = False

if not DEBUG:
    gasp_helper = scraperwiki.utils.swimport("gasp_helper")
else:
    import gasp_helper


# This is a Python template for a Sunlight Labs Great American Scraper Project 
# http://services.sunlightlabs.com/gasp/legislators/


# Config
gasp = gasp_helper.GaspHelper("aacf0db461fe47179a15f3c5422100dc", "B001268")

def _get_html_dom(url):
    html = scraperwiki.scrape(url)
    return lxml.html.fromstring(html)


# Scrape contact info for offices
def scrape_offices():
    print "Scraping offices"
    root = _get_html_dom("http://www.scottbrown.senate.gov/public/index.cfm/officeinformation/")

    officenames = ["dc","boston","plymouth","springfield","worcester"]  # each div class has different name, so create an array of names
    for oname in officenames:
        for tr in root.cssselect("div[class='"+oname+"'] div[class='content']"):   # finds divs with class attributes equalling oname, finds content inside (which is the same across divs)
            print tr
import time
     
DEBUG = False
     
if not DEBUG:
    gasp_helper = scraperwiki.utils.swimport("gasp_helper")
else:
    import gasp_helper
     
     
    # This is a Python template for a Sunlight Labs Great American Scraper Project
    # http://services.sunlightlabs.com/gasp/legislators/
     
     
    # Config
gasp = gasp_helper.GaspHelper("721dab355e47475da32b69d30b16c9cb", "G000570")

def _get_html_dom(url):
    html = scraperwiki.scrape(url)
    return lxml.html.fromstring(html)
    
def scrape_offices():
    print "Scraping Offices"
    root = _get_html_dom("https://guinta.house.gov/contact-me")
    offices = root.cssselect("div#office-locations-wrapper")
    addresses = offices[0].text_content()
    print addresses

def scrape_biography():
    print "Scraping bio"
    root = _get_html_dom("http://guinta.house.gov/about-me/full-biography")
import scraperwiki

try:
    swutils = scraperwiki.utils
except AttributeError:
    import gasp_helper
else:
    swutils.swimport("gasp_helper")

API_KEY = ''  # sunlight api key here
BIOGUIDE_ID = ''  # bioguide id here

gasp = gasp_helper.GaspHelper(API_KEY, BIOGUIDE_ID)
Exemple #6
0
import lxml.html
import datetime
import time

DEBUG = False

if not DEBUG:
    gasp_helper = scraperwiki.utils.swimport("gasp_helper")
else:
    import gasp_helper

# This is a Python template for a Sunlight Labs Great American Scraper Project
# http://services.sunlightlabs.com/gasp/legislators/

# Config
gasp = gasp_helper.GaspHelper("ab4686629daa4f80b6b1559eaa7eee4b", "L000554")


def _get_html_dom(url):
    html = scraperwiki.scrape(url)
    return lxml.html.fromstring(html)


# Scrape contact info for offices
def scrape_offices():
    print "Scraping offices"
    root = _get_html_dom(
        "http://lobiondo.house.gov/office/mays-landing-office")
    for office in range(2):
        addr_text = root.cssselect("div.address")[office].text_content()
        city_text = root.cssselect("div.city")[office].text_content()