コード例 #1
0
ファイル: helsinki.py プロジェクト: jlehtoma/datavaalit
    # Some of the elements have multiple lines (with email address
    # on the 2nd line. Take only the first line.
    s = el.text_content().split('\n')[0].strip()
    if not s:
        continue

    # On some occasions, the comma between the first and last name is
    # missing. Work around that by replacing the first space with the comma.
    if s.count(',') < 2:
        s = s.replace(' ', ', ', 1)

    last, first, party = s.split(',')
    # Clean up extra spaces
    first = first.strip()
    # Cut everything after the first space or period
    party = party.strip().split('.')[0].split(' ')[0].replace(':', '')

    # Canonize party abbreviation
    if party.lower() == 'peruss':
        party = 'PS'
    for p in PARTIES:
        if p.startswith(party):
            party = p
            break
    else:
        raise Exception("Unknown party: %s" % party)

    members.append(('%s %s' % (first, last), party))

submit_council_members("Helsinki", members)
コード例 #2
0
ファイル: helsinki.py プロジェクト: d2s/datavaalit
    # on the 2nd line. Take only the first line.
    s = el.text_content().split('\n')[0].strip()
    if not s:
        continue

    # On some occasions, the comma between the first and last name is
    # missing. Work around that by replacing the first space with the comma.
    if s.count(',') < 2:
        s = s.replace(' ', ', ', 1)

    last, first, party = s.split(',')
    # Clean up extra spaces
    first  = first.strip()
    # Cut everything after the first space or period
    party = party.strip().split('.')[0].split(' ')[0].replace(':', '')

    # Canonize party abbreviation
    if party.lower() == 'peruss':
        party = 'PS'
    for p in PARTIES:
        if p.startswith(party):
            party = p
            break
    else:
        raise Exception("Unknown party: %s" % party)

    members.append(('%s %s' % (first, last), party))

submit_council_members("Helsinki", members)

コード例 #3
0
ファイル: jyvaskyla.py プロジェクト: jlehtoma/datavaalit
        else:
            name = el.tail
        name = name.strip()
        members.append((name, party))
    return members

requests_cache.configure('jyvaskyla')

members = []
BASE_URL = 'http://www.jyvaskyla.fi/hallinto/valtuusto/valtuusto09'

r = requests.get(BASE_URL)
doc = html.fromstring(r.text)
# We will be fetching linked pages, so relative paths must be
# convert into absolute URLs.
doc.make_links_absolute(BASE_URL)

# Find the p element that contains the text "Valtuustoryhmät"
el = doc.xpath(u"//h2[contains(., 'Valtuustoryhmät')]")[0]
# The links to the council groups follow
party_links = el.xpath("following-sibling::p/a")
for link_el in party_links:
    url = link_el.attrib['href']
    ret = scrape_council_group(url)
    members += ret

# The city has exactly 75 council members
assert len(members) == 75

submit_council_members("Jyväskylä", members)