コード例 #1
0
ファイル: AKLeg.py プロジェクト: rsimoes/govBot
def getAKLeg():
    house, senate = map(
        lambda body: BeautifulSoup(
            urlopen('http://house.legis.state.ak.us/').read()
        ).find(
            'div', {'id': 'tab1-2'}
        ).find(
            'ul', {'class': 'people-holder'}
        ).find(
            'ul', {'class': 'item'}
        ).find_all('li'),
        ('house', 'senate')
    )

    dictList = []

    for body, table in zip(('House', 'Senate'), (house, senate)):
        for item in table:
            repInfo = {}
            repInfo['Name'] = unidecode(
                item.find('strong', {'class': 'name'}).string
            ).strip()

            link = item.find('a')
            repInfo['Website'] = link.get('href')

            dl = item.find('dl')
            district = re.search(
                r'District:\s*(\w+)', dl.get_text(), re.DOTALL
            ).group(1)
            repInfo['District'] = 'AK State {0} District {1}'.format(
                body, district
            )

            repInfo['Party'] = re.search(
            r'Party:\s*(\w+)', dl.get_text(), re.DOTALL
            ).group(1)

            repInfo['Phone'] = re.search(
                r'Phone:\s*([0-9-]+)', dl.get_text(), re.DOTALL
            ).group(1)

            repInfo['Email'] = dl.find('a').get('href').replace('mailto:', '')

            member_soup = BeautifulSoup(urlopen(repInfo['Website']).read())
            repInfo['Address'] = multiline_strip(
                re.search(
                    r'Session Contact(.+99801)',
                    member_soup.find_all('div', {'class': 'bioleft'})[1].get_text(),
                    re.DOTALL
                ).group(1)
            )
            print str(repInfo) + '\n'
            dictList.append(repInfo)

    return dictList
コード例 #2
0
ファイル: TXLeg.py プロジェクト: rsimoes/govBot
def get_house_rep(soup):
    member_info = soup.find('div', {'class': 'member-info'})

    number = re.search(
        r'District (\d+)', str(member_info)
    ).group(1)
    district = 'TX State House District %s' % number

    # TX House member names are in "Last, First" format:
    def rewrite_name(string):
        search = re.search('Rep. (.+?)(?:, (?!Jr.))(.+)', string)
        if search is None:
            return None

        first, last = search.group(2).strip(), search.group(1).strip()
        return unidecode(first + ' ' + last).strip()

    name = rewrite_name(member_info.find('h2').get_text())

    phone = re.search(
        r'\([0-9]{3}\)\s[0-9]{3}-[0-9]{4}',
        str(member_info)
    ).group()

    address = multiline_strip(
        re.search(
            r'Capitol Address:(.+?787\d{2})',
            str(member_info),
            re.DOTALL
        ).group(1)
    )

    return {
        'District': district,
        'Name':     name,
        'Phone':    phone,
        'Address':  address
    }