def scrape_old_vote(self, url): vote_page = self.soup_parser(self.urlopen(url)) header = vote_page.h3.contents[0] chamber_name = header.split(', ')[1] if chamber_name.startswith('House'): chamber = 'lower' else: chamber = 'upper' location = ' '.join(chamber_name.split(' ')[1:]) if location.startswith('of Representatives'): location = '' motion = ', '.join(header.split(', ')[2:]) def get_count(cell): if len(cell.contents) == 0: return 0 else: return int(cell.contents[0]) results_tbl = vote_page.findAll('table')[1] yes_count = get_count(results_tbl.findAll('td')[1]) no_count = get_count(results_tbl.findAll('td')[3]) excused_count = get_count(results_tbl.findAll('td')[5]) absent_count = get_count(results_tbl.findAll('td')[7]) other_count = excused_count + absent_count passed = yes_count > no_count vote = Vote(chamber, None, motion, passed, yes_count, no_count, other_count, excused_count=excused_count, absent_count=absent_count, location=location) vote.add_source(url) vote_tbl = vote_page.table for td in vote_tbl.findAll('td'): if td.contents[0] == 'Yea': vote.yes(td.findPrevious().contents[0]) elif td.contents[0] == 'Nay': vote.no(td.findPrevious().contents[0]) elif td.contents[0] in ['Excused', 'Absent']: vote.other(td.findPrevious().contents[0]) return vote
def scrape_new_vote(self, url): vote_page = self.soup_parser(self.urlopen(url)) header = vote_page.find(id="ctl00_contentMain_hdVote").contents[0] chamber_name = header.split(', ')[1] if chamber_name.startswith('House'): chamber = 'lower' else: chamber = 'upper' location = ' '.join(chamber_name.split(' ')[1:]) if location.startswith('of Representatives'): location = '' motion = ', '.join(header.split(', ')[2:]) yes_count = int(vote_page.find( id="ctl00_contentMain_tdAyes").contents[0]) no_count = int(vote_page.find( id="ctl00_contentMain_tdNays").contents[0]) excused_count = int(vote_page.find( id="ctl00_contentMain_tdExcused").contents[0]) absent_count = int(vote_page.find( id="ctl00_contentMain_tdAbsent").contents[0]) other_count = excused_count + absent_count passed = yes_count > no_count vote = Vote(chamber, None, motion, passed, yes_count, no_count, other_count, excused_count=excused_count, absent_count=absent_count, location=location) vote.add_source(url) vote_tbl = vote_page.find(id="ctl00_contentMain_tblVotes") for td in vote_tbl.findAll('td'): if td.contents[0] == 'Yea': vote.yes(td.findPrevious().contents[0]) elif td.contents[0] == 'Nay': vote.no(td.findPrevious().contents[0]) elif td.contents[0] in ['Excused', 'Absent']: vote.other(td.findPrevious().contents[0]) return vote
def scrape_bill(self, chamber, current_bill, session): other_chamber = "upper" if chamber == "lower" else "lower" with self.soup_context( "http://alisondb.legislature.state.al.us/acas/SESSBillsStatusResultsMac.asp?BillNumber=%s&GetStatus=Get+Status&session=%s" % (current_bill, session[0]) ) as bill: if "Your ACAS Session has expired." in str(bill): raise Exception("Expired cookie - you'll have to run with -n to skip caching") try: bill_id = int(re.findall(r"BTN([0-9]+)", str(bill))[0]) except: raise Exception("No bill found. Hopefully that means it's the end of the session") title = bill.find("td", {"colspan": "7"}).string self.log("Starting parse of %s" % current_bill) # create our bill! bill = Bill(session[1], chamber, current_bill, title.strip()) # add sponsors and co-sponsors with self.soup_context( "http://alisondb.legislature.state.al.us/acas/ACTIONSponsorsResultsMac.asp?OID=%d" % bill_id ) as sponsors: # This pains me. (primary, secondary) = sponsors.findAll("table", text="Co-Sponsors")[0].parent.parent.parent.findAll( "table" ) for p in primary.findAll("td"): bill.add_sponsor("primary", p.string) for s in secondary.findAll("td"): bill.add_sponsor("cosponsor", s.string) with self.soup_context( "http://alisondb.legislature.state.al.us/acas/ACTIONHistoryResultsMac.asp?OID=%d" % bill_id ) as history: actions = history.findAll("table", text="Committee")[0].parent.parent.parent.findAll("tr") # Date Amend/Subst Matter Committee Nay Yea Abs Vote for event in actions: e = event.findAll("td") if len(e) == 0: continue date = e[0].string amend = e[1].find("input") matter = e[2].string y_votes = e[5].string n_votes = e[4].string a_votes = e[6].string if not matter: continue roll = e[7].find("input") # (date, amend, matter, committee, nays, yeas, abs, vote_thing) = map(lambda x: x.string, e) if date != None: act_date = dt.datetime.strptime(date, "%m/%d/%Y") if amend != None: splitter = re.findall( r"documentSelected\(\'(\w*)\',\'([\w\d-]*)\',\'([\w\.\-]*)\',\'([\w\d/]*)\',\'([\w\d]*)\',\'([\w\s]*)\'", str(amend), )[0] amend = "http://alisondb.legislature.state.al.us/acas/%s/%s" % (splitter[3], splitter[2]) bill.add_document(matter, amend) if roll != None: splitter = re.findall( r"voteSelected\(\'(\d*)\',\'(\d*)\',\'(\d*)\',\'(.*)\',\'(\d*)\'", str(roll) )[0] roll = ( "http://alisondb.legislature.state.al.us/acas/GetRollCallVoteResults.asp?MOID=%s&VOTE=%s&BODY=%s&SESS=%s" % (splitter[0], splitter[1], splitter[2], splitter[4]) ) with self.soup_context(roll) as votes: vote_rows = votes.findAll("table", text="Member")[0].parent.parent.parent.findAll("tr") yea_votes = int( votes.findAll("tr", text="Total Yea:")[0].parent.parent.findAll("td")[2].string ) nay_votes = int( votes.findAll("tr", text="Total Nay:")[0].parent.parent.findAll("td")[2].string ) abs_votes = int( votes.findAll("tr", text="Total Abs:")[0].parent.parent.findAll("td")[2].string ) p_votes = len(votes.findAll("tr", text="P")) # chamber, date, motion, passed, yes_count, no_count, other_count vote = Vote( chamber, act_date, matter, (yea_votes > nay_votes), yea_votes, nay_votes, abs_votes + p_votes, ) vote.add_source(roll) for row in vote_rows: skip = str(row) if "Total Yea" in skip or "Total Nay" in skip or "Total Abs" in skip: continue html_layouts_are_awesome = row.findAll("td") if len(html_layouts_are_awesome) == 0: continue (name, t) = html_layouts_are_awesome[0].string, html_layouts_are_awesome[2].string self.dumb_vote(vote, name, t) if len(html_layouts_are_awesome) > 3: (name, t) = html_layouts_are_awesome[4].string, html_layouts_are_awesome[6].string self.dumb_vote(vote, name, t) bill.add_vote(vote) if y_votes != None: yea_votes = self.dumber_vote(y_votes) nay_votes = self.dumber_vote(n_votes) abs_votes = self.dumber_vote(a_votes) vote = Vote(chamber, act_date, matter, (yea_votes > nay_votes), yea_votes, nay_votes, abs_votes) bill.add_vote(vote) bill.add_action(chamber, matter, act_date) self.save_bill(bill)