def scrape_votes(self, link, chamber, bill): with self.lxml_context(link) as votes_page: page_tables = votes_page.cssselect("table") votes_table = page_tables[0] votes_elements = votes_table.cssselect("td") # Eliminate table headings and unnecessary element votes_elements = votes_elements[3 : len(votes_elements)] ve = self.grouper(5, votes_elements) for actor, date, name_and_text, name, text in ve: if "cow" in text.text_content() or "COW" in text.text_content(): continue vote_date = dt.datetime.strptime(date.text_content(), "%m/%d/%Y") motion_and_votes = text.text_content().lstrip("FINAL VOTE - ") motion, sep, votes = motion_and_votes.partition(".") if "passed" in votes: passed = True else: passed = False votes_match = re.search("([0-9]+)-([0-9]+)-?([0-9]+)?", votes) yes_count = votes_match.group(1) no_count = votes_match.group(2) other_count = votes_match.group(3) if other_count == None: other_count = 0 vote = Vote(chamber, vote_date, motion, passed, yes_count, no_count, other_count) vote.add_source(link) bill.add_vote(vote)
def scrape_vote(self, bill, name, url): match = re.match('^(Senate|House) Vote on [^,]*,(.*)$', name) if not match: return chamber = {'Senate': 'upper', 'House': 'lower'}[match.group(1)] motion = match.group(2).strip() if motion.startswith('FINAL PASSAGE'): type = 'passage' elif motion.startswith('AMENDMENT'): type = 'amendment' elif 'ON 3RD READINT' in motion: type = 'reading:3' else: type = 'other' vote = Vote(chamber, None, motion, None, None, None, None) vote['type'] = type vote.add_source(url) with self.urlopen(url) as text: (fd, temp_path) = tempfile.mkstemp() with os.fdopen(fd, 'wb') as w: w.write(text) html = pdf_to_lxml(temp_path) os.remove(temp_path) vote_type = None total_re = re.compile('^Total--(\d+)$') body = html.xpath('string(/html/body)') for line in body.replace(u'\xa0', '\n').split('\n'): line = line.replace(' ', '').strip() if not line: continue if line in ('YEAS', 'NAYS', 'ABSENT'): vote_type = {'YEAS': 'yes', 'NAYS': 'no', 'ABSENT': 'other'}[line] elif vote_type: match = total_re.match(line) if match: vote['%s_count' % vote_type] = int(match.group(1)) elif vote_type == 'yes': vote.yes(line) elif vote_type == 'no': vote.no(line) elif vote_type == 'other': vote.other(line) # The PDFs oddly don't say whether a vote passed or failed. # Hopefully passage just requires yes_votes > not_yes_votes if vote['yes_count'] > (vote['no_count'] + vote['other_count']): vote['passed'] = True else: vote['passed'] = False bill.add_vote(vote)
def parse_vote(self, bill, action, act_chamber, act_date, url): url = "http://www.legis.state.ak.us/basis/%s" % url info_page = self.soup_parser(self.urlopen(url)) tally = re.findall('Y(\d+) N(\d+)\s*(?:\w(\d+))*\s*(?:\w(\d+))*' '\s*(?:\w(\d+))*', action)[0] yes, no, o1, o2, o3 = map(lambda x: 0 if x == '' else int(x), tally) yes, no, other = int(yes), int(no), (int(o1) + int(o2) + int(o3)) votes = info_page.findAll('pre', text=re.compile('Yeas'), limit=1)[0].split('\n\n') motion = info_page.findAll(text=re.compile('The question being'))[0] motion = re.findall('The question being:\s*"(.*)\?"', motion, re.DOTALL)[0].replace('\n', ' ') vote = Vote(act_chamber, act_date, motion, yes > no, yes, no, other) for vote_list in votes: vote_type = False if vote_list.startswith('Yeas: '): vote_list, vote_type = vote_list[6:], vote.yes elif vote_list.startswith('Nays: '): vote_list, vote_type = vote_list[6:], vote.no elif vote_list.startswith('Excused: '): vote_list, vote_type = vote_list[9:], vote.other elif vote_list.startswith('Absent: '): vote_list, vote_type = vote_list[9:], vote.other if vote_type: for name in vote_list.split(','): vote_type(name.strip()) vote.add_source(url) return vote
def parse_vote_new(self, bill, chamber, url): vote_page = BeautifulSoup(self.urlopen(url)) table = vote_page.table info_row = table.findAll('tr')[1] date = info_row.td.contents[0] date = dt.datetime.strptime(date, '%m/%d/%Y') motion = info_row.findAll('td')[1].contents[0] yes_count = int(info_row.findAll('td')[2].contents[0]) no_count = int(info_row.findAll('td')[3].contents[0]) abs_count = int(info_row.findAll('td')[4].contents[0]) passed = info_row.findAll('td')[5].contents[0] == 'Pass' vote = Vote(chamber, date, motion, passed, yes_count, no_count, abs_count) vote.add_source(url) for tr in table.findAll('tr')[3:]: if len(tr.findAll('td')) != 2: continue name = tr.td.contents[0].split(' of')[0] type = tr.findAll('td')[1].contents[0] if type.startswith('Yea'): vote.yes(name) elif type.startswith('Nay'): vote.no(name) else: vote.other(name) bill.add_vote(vote)
def add_vote(self, bill, chamber, date, line, text): votes = re.findall(r'Ayes (\d+)\, Noes (\d+)', text) (yes, no) = int(votes[0][0]), int(votes[0][1]) vtype = 'other' for regex, type in motion_classifiers.iteritems(): if re.match(regex, text): vtype = type break v = Vote(chamber, date, text, yes > no, yes, no, 0, type=vtype) # fetch the vote itself link = line.xpath('//a[contains(@href, "/votes/")]') if link: link = link[0].get('href') v.add_source(link) filename, resp = self.urlretrieve(link) if 'av' in link: self.add_house_votes(v, filename) elif 'sv' in link: self.add_senate_votes(v, filename) bill.add_vote(v)
def scrape_vote(self, bill, date, url): with self.urlopen(url) as page: page = lxml.html.fromstring(page) header = page.xpath("string(//h4[contains(@id, 'hdVote')])") location = header.split(', ')[1] if location.startswith('House'): chamber = 'lower' elif location.startswith('Senate'): chamber = 'upper' else: raise ScrapeError("Bad chamber: %s" % chamber) committee = ' '.join(location.split(' ')[1:]).strip() if not committee or committee.startswith('of Representatives'): committee = None motion = ', '.join(header.split(', ')[2:]).strip() yes_count = int( page.xpath("string(//td[contains(@id, 'tdAyes')])")) no_count = int( page.xpath("string(//td[contains(@id, 'tdNays')])")) excused_count = int( page.xpath("string(//td[contains(@id, 'tdExcused')])")) absent_count = int( page.xpath("string(//td[contains(@id, 'tdAbsent')])")) other_count = excused_count + absent_count passed = yes_count > no_count if motion.startswith('Do Pass'): type = 'passage' elif motion == 'Concurred in amendments': type = 'amendment' elif motion == 'Veto override': type = 'veto_override' else: type = 'other' vote = Vote(chamber, date, motion, passed, yes_count, no_count, other_count) vote['type'] = type if committee: vote['committee'] = committee vote.add_source(url) for td in page.xpath("//table[contains(@id, 'tblVotes')]/tr/td"): if td.text == 'Yea': vote.yes(td.getprevious().text.strip()) elif td.text == 'Nay': vote.no(td.getprevious().text.strip()) elif td.text in ('Excused', 'Absent'): vote.other(td.getprevious().text.strip()) bill.add_vote(vote)
def scrape_votes(self, link, chamber, bill): with self.urlopen(link) as votes_page_html: votes_page = lxml.html.fromstring(votes_page_html) page_tables = votes_page.cssselect('table') votes_table = page_tables[0] votes_elements = votes_table.cssselect('td') # Eliminate table headings and unnecessary element votes_elements = votes_elements[3:len(votes_elements)] ve = grouper(5, votes_elements) for actor, date, name_and_text, name, text in ve: if 'cow' in text.text_content() or 'COW' in text.text_content(): continue vote_date = dt.datetime.strptime(date.text_content(), '%m/%d/%Y') motion_and_votes = text.text_content().lstrip('FINAL VOTE - ') motion, sep, votes = motion_and_votes.partition('.') if 'passed' in votes: passed = True else: passed = False votes_match = re.search('([0-9]+)-([0-9]+)-?([0-9]+)?', votes) yes_count = votes_match.group(1) no_count = votes_match.group(2) other_count = votes_match.group(3) if other_count == None: other_count = 0 vote = Vote(chamber, vote_date, motion, passed, \ yes_count, no_count, other_count) vote.add_source(link) bill.add_vote(vote)
def scrape_vote(self, bill, name, url): match = re.match("^(Senate|House) Vote on [^,]*,(.*)$", name) if not match: return chamber = {"Senate": "upper", "House": "lower"}[match.group(1)] motion = match.group(2).strip() if motion.startswith("FINAL PASSAGE"): type = "passage" elif motion.startswith("AMENDMENT"): type = "amendment" elif "ON 3RD READINT" in motion: type = "reading:3" else: type = "other" vote = Vote(chamber, None, motion, None, None, None, None) vote["type"] = type vote.add_source(url) with self.urlopen(url) as text: (fd, temp_path) = tempfile.mkstemp() with os.fdopen(fd, "wb") as w: w.write(text) html = pdf_to_lxml(temp_path) os.remove(temp_path) vote_type = None total_re = re.compile("^Total--(\d+)$") body = html.xpath("string(/html/body)") for line in body.replace(u"\xa0", "\n").split("\n"): line = line.replace(" ", "").strip() if not line: continue if line in ("YEAS", "NAYS", "ABSENT"): vote_type = {"YEAS": "yes", "NAYS": "no", "ABSENT": "other"}[line] elif vote_type: match = total_re.match(line) if match: vote["%s_count" % vote_type] = int(match.group(1)) elif vote_type == "yes": vote.yes(line) elif vote_type == "no": vote.no(line) elif vote_type == "other": vote.other(line) # The PDFs oddly don't say whether a vote passed or failed. # Hopefully passage just requires yes_votes > not_yes_votes if vote["yes_count"] > (vote["no_count"] + vote["other_count"]): vote["passed"] = True else: vote["passed"] = False bill.add_vote(vote)
def scrape_old_vote(self, url): vote_page = self.soup_parser(self.urlopen(url)) header = vote_page.h3.contents[0] chamber_name = header.split(', ')[1] if chamber_name.startswith('House'): chamber = 'lower' else: chamber = 'upper' location = ' '.join(chamber_name.split(' ')[1:]) if location.startswith('of Representatives'): location = '' motion = ', '.join(header.split(', ')[2:]) def get_count(cell): if len(cell.contents) == 0: return 0 else: return int(cell.contents[0]) results_tbl = vote_page.findAll('table')[1] yes_count = get_count(results_tbl.findAll('td')[1]) no_count = get_count(results_tbl.findAll('td')[3]) excused_count = get_count(results_tbl.findAll('td')[5]) absent_count = get_count(results_tbl.findAll('td')[7]) other_count = excused_count + absent_count passed = yes_count > no_count vote = Vote(chamber, None, motion, passed, yes_count, no_count, other_count, excused_count=excused_count, absent_count=absent_count, location=location) vote.add_source(url) vote_tbl = vote_page.table for td in vote_tbl.findAll('td'): if td.contents[0] == 'Yea': vote.yes(td.findPrevious().contents[0]) elif td.contents[0] == 'Nay': vote.no(td.findPrevious().contents[0]) elif td.contents[0] in ['Excused', 'Absent']: vote.other(td.findPrevious().contents[0]) return vote
def scrape_votes(self, bill, sponsor, link): with self.urlopen(link) as page: page = lxml.html.fromstring(page) raw_vote_data = page.xpath("//span[@id='lblVoteData']")[0].text_content() raw_vote_data = raw_vote_data.strip().split('%s by %s - ' % (bill['bill_id'], sponsor))[1:] for raw_vote in raw_vote_data: raw_vote = raw_vote.split(u'\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0') motion = raw_vote[0] vote_date = re.search('(\d+/\d+/\d+)', motion) if vote_date: vote_date = datetime.datetime.strptime(vote_date.group(), '%m/%d/%Y') passed = ('Passed' in motion) or ('Adopted' in raw_vote[1]) vote_regex = re.compile('\d+$') aye_regex = re.compile('^.+voting aye were: (.+) -') no_regex = re.compile('^.+voting no were: (.+) -') yes_count = None no_count = None other_count = 0 ayes = [] nos = [] for v in raw_vote[1:]: if v.startswith('Ayes...') and vote_regex.search(v): yes_count = int(vote_regex.search(v).group()) elif v.startswith('Noes...') and vote_regex.search(v): no_count = int(vote_regex.search(v).group()) elif aye_regex.search(v): ayes = aye_regex.search(v).groups()[0].split(', ') elif no_regex.search(v): nos = no_regex.search(v).groups()[0].split(', ') if yes_count and no_count: passed = yes_count > no_count else: yes_count = no_count = 0 vote = Vote(bill['chamber'], vote_date, motion, passed, yes_count, no_count, other_count) vote.add_source(link) for a in ayes: vote.yes(a) for n in nos: vote.no(n) bill.add_vote(vote) return bill
def scrape_lower_vote(self, url): with self.urlopen(url) as page: page = lxml.html.fromstring(page) table = page.xpath("/html/body/table/tr[3]/td/table/tr/" "td[3]/table/tr/td/table[3]")[0] motion = "" for part in ("Amendment Number", "Reading Number", "Floor Actions"): motion += page.xpath("string(//*[contains(text(), '%s')])" % part).strip() + " " motion = motion.strip() date = page.xpath( 'string(//*[contains(text(), "Date:")]/following-sibling::*)') date = datetime.datetime.strptime(date, "%m/%d/%Y") yeas = page.xpath('string(//*[contains(text(), "Yeas")])') yeas = int(yeas.split(' - ')[1]) nays = page.xpath('string(//*[contains(text(), "Nays")])') nays = int(nays.split(' - ')[1]) nv = page.xpath('string(//*[contains(text(), "Not Voting")])') nv = int(nv.split(' - ')[1]) passed = yeas > (nays + nv) vote = Vote('lower', date, motion, passed, yeas, nays, nv) vote.add_source(url) for tr in table.xpath("tr/td/table/tr"): text = tr.xpath("string()") text = re.sub(r"\s+", r" ", text) name = " ".join(text.split()[1:]) if text[0] == "Y": vote.yes(name) elif text[0] == "N": vote.no(name) elif text[0] in ("-", "C"): vote.other(name) return vote
def scrape_new_vote(self, url): vote_page = self.soup_parser(self.urlopen(url)) header = vote_page.find(id="ctl00_contentMain_hdVote").contents[0] chamber_name = header.split(', ')[1] if chamber_name.startswith('House'): chamber = 'lower' else: chamber = 'upper' location = ' '.join(chamber_name.split(' ')[1:]) if location.startswith('of Representatives'): location = '' motion = ', '.join(header.split(', ')[2:]) yes_count = int(vote_page.find( id="ctl00_contentMain_tdAyes").contents[0]) no_count = int(vote_page.find( id="ctl00_contentMain_tdNays").contents[0]) excused_count = int(vote_page.find( id="ctl00_contentMain_tdExcused").contents[0]) absent_count = int(vote_page.find( id="ctl00_contentMain_tdAbsent").contents[0]) other_count = excused_count + absent_count passed = yes_count > no_count vote = Vote(chamber, None, motion, passed, yes_count, no_count, other_count, excused_count=excused_count, absent_count=absent_count, location=location) vote.add_source(url) vote_tbl = vote_page.find(id="ctl00_contentMain_tblVotes") for td in vote_tbl.findAll('td'): if td.contents[0] == 'Yea': vote.yes(td.findPrevious().contents[0]) elif td.contents[0] == 'Nay': vote.no(td.findPrevious().contents[0]) elif td.contents[0] in ['Excused', 'Absent']: vote.other(td.findPrevious().contents[0]) return vote
def parse_vote(self, bill, actor, date, motion, url): with self.urlopen(url) as page: vote_re = re.compile('YEAS -?\s?(\d+)(.*)NAYS -?\s?(\d+)' '(.*)ABSENT( OR NOT VOTING)? -?\s?' '(\d+)(.*)', re.MULTILINE | re.DOTALL) match = vote_re.search(page) yes_count = int(match.group(1)) no_count = int(match.group(3)) other_count = int(match.group(6)) if yes_count > no_count: passed = True else: passed = False if actor == 'upper' or actor == 'lower': vote_chamber = actor vote_location = '' else: vote_chamber = '' vote_location = actor vote = Vote(vote_chamber, date, motion, passed, yes_count, no_count, other_count, location=vote_location) vote.add_source(url) yes_votes = re.split('\s{2,}', match.group(2).strip()) no_votes = re.split('\s{2,}', match.group(4).strip()) other_votes = re.split('\s{2,}', match.group(7).strip()) for yes in yes_votes: if yes: vote.yes(yes) for no in no_votes: if no: vote.no(no) for other in other_votes: if other: vote.other(other) bill.add_vote(vote)
def scrape_vote(self, bill, chamber, url): with self.urlopen(url) as page: page = page.replace(' ', ' ') page = lxml.html.fromstring(page) info_row = page.xpath("//table[1]/tr[2]")[0] date = info_row.xpath("string(td[1])") date = datetime.datetime.strptime(date, "%m/%d/%Y") motion = info_row.xpath("string(td[2])") yes_count = int(info_row.xpath("string(td[3])")) no_count = int(info_row.xpath("string(td[4])")) other_count = int(info_row.xpath("string(td[5])")) passed = info_row.xpath("string(td[6])") == 'Pass' if motion == 'Shall the bill pass?': type = 'passage' elif motion == 'Shall the bill be read the third time?': type = 'reading:3' elif 'be amended as' in motion: type = 'amendment' else: type = 'other' vote = Vote(chamber, date, motion, passed, yes_count, no_count, other_count) vote.add_source(url) for tr in page.xpath("//table[1]/tr")[3:]: if len(tr.xpath("td")) != 2: continue name = tr.xpath("string(td[1])").split(' of')[0] type = tr.xpath("string(td[2])").strip() if type == 'Yea': vote.yes(name) elif type == 'Nay': vote.no(name) else: vote.other(name) bill.add_vote(vote)
def scrape_votes(self, vote_text, vote_url, house, date, bill): votes_parts = vote_text.split(";") voters = [] motion_text, sep, after = vote_text.partition(" The votes were as follows:") for vp in votes_parts: before, sep, after = vp.partition("(s)") voters_list = after.split(", ") voters_list[0] = voters_list[0].lstrip(" ") voters_list[-1] = voters_list[-1].rstrip(". ") voters.append(voters_list) #Ayes, Ayes with reservations, Noes, Excused vote_counts = [0, 0, 0, 0] for i, t in enumerate(votes_parts): match = re.search("[0-9]+", t) if (match != None): vote_counts[i] = int(match.group(0)) if(house == 'H'): vote_house = "lower" else: vote_house = "upper" vote = Vote(vote_house, date, motion_text, True, \ vote_counts[0], vote_counts[2], vote_counts[1] + vote_counts[3]) vote.add_source(vote_url) for yes_voter in voters[0]: vote.yes(yes_voter) for no_voter in voters[2]: vote.no(no_voter) for other_voter in voters[1]: vote.other(other_voter) for other_voter in voters[2]: vote.other(other_voter) bill.add_vote(vote)
def scrape_vote(self, chamber, session, bill_id, vote_url): NO_VOTE_URL = 'http://www.house.leg.state.mn.us/votes/novotefound.asp' with self.urlopen(vote_url) as html: # sometimes the link is broken, will redirect to NO_VOTE_URL if html.response.url == NO_VOTE_URL: return doc = lxml.html.fromstring(html) paragraphs = doc.xpath('//h1/following-sibling::p') # first paragraph has motion and vote total top_par = paragraphs[0].text_content() lines = top_par.splitlines() # 3rd line is the motion except in cases where first line is gone motion = lines[2] or lines[1] # last line is "__ YEA and __ Nay" yeas, nays = self.yeanay_re.match(lines[-1]).groups() yeas = int(yeas) nays = int(nays) # second paragraph has date date = self.date_re.match(paragraphs[1].text_content()).groups()[0] date = datetime.datetime.strptime(date, '%m/%d/%Y') filename = 'vote%s-%s' % (self.sequence.next(), bill_id) vote = Vote('lower', date, motion, yeas>nays, yeas, nays, 0, session=session, bill_id=bill_id, bill_chamber=chamber, filename=filename) vote.add_source(vote_url) # first table has YEAs for name in doc.xpath('//table[1]/tr/td/font/text()'): vote.yes(name.strip()) # second table is nays for name in doc.xpath('//table[2]/tr/td/font/text()'): vote.no(name.strip()) self.save_vote(vote)
def scrape_upper_vote(self, url): with self.urlopen(url) as page: page = lxml.html.fromstring(page) text = page.xpath('string(//pre)') motion = "" motion += re.search(r'Amendment Number:\s([^\s]+)?', text, re.MULTILINE).group(0).strip() motion += " " + re.search(r'Reading Number .:\s([^\s]+)?', text, re.MULTILINE).group(0).strip() motion += " " + re.search(r'Floor Actions ..:\s([^\s]+)?', text, re.MULTILINE).group(0).strip() yeas = int(re.search(r'Yeas\s-\s(\d+)', text, re.MULTILINE).group(1)) nays = int(re.search(r'Nays\s-\s(\d+)', text, re.MULTILINE).group(1)) nv = int(re.search(r'Not\sVoting\s-\s(\d+)', text, re.MULTILINE).group(1)) date = re.search(r'Date:\s(\d+/\d+/\d+)', text, re.MULTILINE).group(1) date = datetime.datetime.strptime(date, '%m/%d/%Y') passed = yeas > (nays + nv) vote = Vote('upper', date, motion, passed, yeas, nays, nv) vote.add_source(url) pattern = r'_\s%s\s(\w+)' for match in re.finditer(pattern % 'Y ', text, re.MULTILINE): vote.yes(match.group(1)) for match in re.finditer(pattern % 'N ', text, re.MULTILINE): vote.no(match.group(1)) for match in re.finditer(pattern % 'EX', text, re.MULTILINE): vote.other(match.group(1)) return vote
def scrape_vote(self, bill, vote_type_id, vote_type): base_url = 'http://www.dccouncil.washington.dc.us/lims/voting.aspx?VoteTypeID=%s&LegID=%s' url = base_url % (vote_type_id, bill['bill_id']) with self.urlopen(url) as html: doc = lxml.html.fromstring(html) vote_date = convert_date(doc.get_element_by_id('VoteDate').text) # check if voice vote / approved boxes have an 'x' voice = (doc.xpath('//span[@id="VoteTypeVoice"]/b/text()')[0] == 'x') passed = (doc.xpath('//span[@id="VoteResultApproved"]/b/text()')[0] == 'x') yes_count = extract_int(doc.xpath( '//span[@id="VoteCount1"]/b/text()')[0]) no_count = extract_int(doc.xpath( '//span[@id="VoteCount2"]/b/text()')[0]) other_count = 13 - (yes_count+no_count) # a bit lazy vote = Vote('upper', vote_date, vote_type, passed, yes_count, no_count, other_count, voice_vote=voice) vote.add_source(url) # members are only text on page in a <u> tag for member_u in doc.xpath('//u'): member = member_u.text vote_text = member_u.xpath('../../i/text()')[0] if 'YES' in vote_text: vote.yes(member) elif 'NO' in vote_text: vote.no(member) else: vote.other(member) bill.add_vote(vote)
def scrape_votes(self, vote_page, bill, url): date_match = re.search("[0-9]{1,2}/[0-9]{1,2}/[0-9]{4}", vote_page.text_content()) date_match = date_match.group(0) vote_date = dt.datetime.strptime(date_match, '%m/%d/%Y') votes = {"Yeas":0, "Nays":0, "Absent":0, "Excused":0} for type, number in votes.items(): match = re.search(type + ": [0-9]+", vote_page.text_content()) match = match.group(0) match = match.split(" ") number = match[1] passed = votes["Yeas"] > votes["Nays"] chamber_match = re.search("(Senate|House) vote", vote_page.text_content()) chamber_match = chamber_match.group(0) chamber_match = chamber_match.split(" ") chamber_match = chamber_match[0] if chamber_match == "Senate": chamber = "upper" title = "Senator" else: chamber = "lower" title = "Representative" motion_match = vote_page.cssselect('td[align="center"]') motion_match = motion_match[2] motion = motion_match.text_content() vote = Vote(chamber, vote_date, motion, passed, votes["Yeas"], votes["Nays"], votes["Absent"] + votes["Excused"]) vote.add_source(url) vote_elements = vote_page.cssselect('span[class="RollCall"]') vote_types = [] for ve in vote_elements: voters = ve.text_content().split(", ") if len(voters) == 1: voters = voters[0].split(" and ") before, itself, after = voters[0].partition(title) voters[0] = after.lstrip("s ") voters[-1] = voters[-1].lstrip("and ") vote_types.append(voters) for v in vote_types[0]: vote.yes(v) for v in vote_types[1]: vote.no(v) for v in vote_types[2]: vote.other(v) for v in vote_types[3]: vote.other(v) bill.add_vote(vote)
def scrape_votes(self, bill, file_type, number, session): vote_url = 'http://www.legislature.state.oh.us/votes.cfm?ID=' + session + '_' + file_type + '_' + str(number) with self.urlopen(vote_url) as page: root = lxml.etree.fromstring(page, lxml.etree.HTMLParser()) save_date = None for el in root.xpath('/html/body/table/tr[3]/td/table/tr[1]/td[2][@class="bigPanel"]/blockquote/font/table'): for mr in root.xpath('/html/body/table/tr[3]/td/table/tr[1]/td[2][@class="bigPanel"]/blockquote/font/table/tr[position() > 1]'): yes_count = 0 yes_placement = 0 no_count = 0 no_placement = 0 date = mr.xpath('string(td/font/a)') date = date.lstrip() date = date.rstrip() info = mr.xpath('string(td[2]/font)') #makes sure that date is saved if len(date.split()) > 0: date = datetime.strptime(date, "%m/%d/%Y") save_date = date #figures out the number of votes for each way #also figures out placement of yes and no voters starts for later iteration if info.split()[0] == 'Yeas': #yes votes yes_count = info.split()[2] #no votes for voter in range(3, len(info.split())): if info.split()[voter] == '-': no_count = info.split()[voter + 1] no_placement = voter + 2 yes_placement = voter - 2 #motion and chamber if info.split()[-1] == 'details': motion = info[0:len(info)-10] motion = motion.lstrip() motion = motion.rstrip() chamber = motion.split()[0] if chamber == "Senate": chamber = "upper" else: chamber = "lower" #pass or not (only by which has more. need to see look up how they are passed) if yes_count > no_count: passed = True else: passed = False vote = Vote(chamber, save_date, motion, passed, int(yes_count), int(no_count), other_count = 0) #adding in yea voters for voters in range(3, yes_placement): legis = "" initials = 0 #checks to see if the next name is actually an initial if len(info.split()[voters+1]) < 2: legis = legis + info.split()[voters] + " " + info.split()[voters + 1] elif len(info.split()[voters]) < 2: initials = 1 else: legis = legis + info.split()[voters] if initials < 1: vote.yes(legis) #adding in no voters for voters in range(no_placement, len(info.split())): legis = "" initials = 0 #checks to see if the next name is actually an initial if (info.split()[voters] != info.split()[-1]) and (len(info.split()[voters+1]) < 2): legis = legis + info.split()[voters] + " " + info.split()[voters + 1] elif len(info.split()[voters]) < 2: initals = 1 else: legis = legis + info.split()[voters] if initials < 1: vote.no(legis) #gets rid of blank votes if yes_count > 0 or no_count > 0: vote.add_source(vote_url) bill.add_vote(vote)
def scrape_bill(self, chamber, session, bill_id): session_id = self.get_session_id(session) url = base_url + 'DocumentsForBill.asp?Bill_Number=%s&Session_ID=%s' % ( bill_id, session_id) with self.urlopen(url) as docs_for_bill: root = html.fromstring(docs_for_bill) bill_title = root.xpath( '//div[@class="ContentPageTitle"]')[1].text.strip() # Depending on the progress the bill has made through the house # some table might not exist, the links that have javascript:Show**** # have a table with related documents/calanders/agendas/versions # I am skipping the sponsors link because that information is on the # bill overview page where all of the actions are found. doc_section_links = root.xpath( '//a[contains(@href, "javascript:Show")]') bill = Bill(session, chamber, bill_id, bill_title) bill.type = self.get_bill_type(bill_id[:-4]) bill.add_source(url) for link in doc_section_links: link_id = utils.parse_link_id(link) link_text = link.text_content().strip() div_path = '//div[@id="%s"]/table//tr' % link_id if link_text == 'Show Versions': # the first row has only a comment for tr in root.xpath(div_path)[1:]: tds = tr.cssselect('td') # list(tr.iterchildren('td')) if len(tds) >= 4: bill_version = tds[1].text_content().strip() bill_html = tds[2].xpath('string(font/a/@href)') bill_pdf = tds[3].xpath('string(font/a/@href)') bill.add_version(bill_version, bill_html, pdf_url=bill_pdf) elif link_text == 'Show Summaries/Fact Sheets': for tr in root.xpath(div_path)[1:]: # the first row has only a comment tds = tr.cssselect('td') if len(tds) > 1: fact_sheet = tds[1].text_content().strip() fact_sheet_url = tds[1].xpath( 'string(font/a/@href)') bill.add_document(fact_sheet, fact_sheet_url, type="fact sheet") elif link_text in ('Show Senate Agendas', 'Show House Agendas'): agenda_type = 'House Agenda' if re.match('House', link_text) else 'Senate Agenda' for tr in root.xpath(div_path)[2:]: # the first row has only a comment # the second row is the table header tds = tr.cssselect('td') if len(tds) >= 8: agenda_committee = tds[0].text_content().strip() agenda_revised = tds[1].text.strip() agenda_cancelled = tds[2].text.strip() agenda_date = tds[3].text_content().strip() agenda_time = tds[4].text_content().strip() agenda_room = tds[5].text_content().strip() agenda_pdf = tds[6].xpath('string(a/@href)').strip() agenda_html = tds[7].xpath('string(a/@href)').strip() bill.add_document(agenda_committee, agenda_html, type=agenda_type) elif link_text in ('Show Senate Calendars', 'Show House Calendar'): cal_type = 'house calendar' if re.match('House', link_text) else 'senate calendar' for tr in root.xpath(div_path)[2:]: # the first row has only a comment # the second row is the table header tds = tr.cssselect('td') if len(tds) >= 6: calendar_name = tds[0].text_content().strip() calendar_number = tds[1].text_content().strip() calendar_modified = True if tds[2].xpath('img') else False calendar_date = tds[3].text_content().strip() calendar_html = tds[5].xpath('string(a/@href)') bill.add_document(calendar_name, calendar_html, type="calendar") elif link_text == 'Show Adopted Amendments': for tr in root.xpath(div_path)[1:]: tds = tr.cssselect('td') amendment_title = tds[1].text_content().strip() amendment_link = tds[2].xpath('string(font/a/@href)') bill.add_document(amendment_title, amendment_link, type='amendment') elif link_text == 'Show Proposed Amendments': for tr in root.xpath(div_path)[1:]: tds = tr.cssselect('td') if len(tds) >= 3: amendment_title = tds[1].text_content().strip() amendment_link = tds[2].xpath('string(font/a/@href)') bill.add_document(amendment_title, amendment_link, type='amendment') elif link_text == 'Show Bill Videos': for tr in root.xpath(div_path)[2:]: tds = tr.cssselect('td') if len(tds) >= 3: video_title = tds[1].text_content().strip() video_link = tds[2].xpath('string(a/@href)') video_date = tds[0].text_content().strip() bill.add_document(video_title, video_link, date=video_date, type='video') # action_url = 'http://www.azleg.gov/FormatDocument.asp?inDoc=/legtext/49leg/2r/bills/hb2001o.asp' # again the actions page may or may not have a given table and the order # of the actions depends on the chamber the bill originated in. ses_num = utils.legislature_to_number(session) action_url = base_url + 'FormatDocument.asp?inDoc=/legtext/%s/bills/%so.asp' % (ses_num, bill_id.lower()) with self.urlopen(action_url) as action_page: bill.add_source(action_url) root = html.fromstring(action_page) action_tables = root.xpath('/html/body/div/table/tr[3]/td[4]/table/tr/td/table/tr/td/table') for table in action_tables: rows = table.cssselect('tr') house = False if chamber == 'upper' else True action = table.cssselect('td')[0].text_content().strip()[:-1] if action == 'SPONSORS': if len(rows[0]) == 4: for row in rows: tds = row.cssselect('td') sponsors = [tds[i:i+2:] for i in range(1, len(tds), 2)] bill.add_sponsor(sponsors[0][1].text_content().strip(), sponsors[0][0].text_content().strip(), sponsor_link=sponsors[0][0].xpath('string(a/@href)')) elif action == 'COMMITTEES': # the html for this table has meta tags that give the chamber # and the committee abreviation # <meta name="HCOMMITTEE" content="RULES"> # question for actions: in the case of committees would House # Rules be better for an actor? for row in rows[1:]: tds = row.cssselect('td') meta_tag = row.cssselect('meta')[0] actor = "%s:%s" % (meta_tag.get('name'), meta_tag.get('content')) committee = meta_tag.get('content') act = 'committee:reffered' date = datetime.datetime.strptime(tds[1].text_content().strip(), '%m/%d/%y') bill.add_action(actor, act, date, type='committee:referred') if len(tds) == 5: if re.match('\d{2}/\d{2}/\d{2}', tds[3].text_content().strip()): date = datetime.datetime.strptime(tds[3].text_content().strip(), '%m/%d/%y') else: date = datetime.datetime.strptime(tds[1].text_content().strip(), '%m/%d/%y') act = tds[4].text_content().strip() status = 'other' bill.add_action(actor, act, date, type=status, status=status) elif len(tds) == 6: where, committee = actor.split(':') where = 'lower' if where == 'HCOMMITTEE' else 'upper' date = datetime.datetime.strptime(tds[3].text_content().strip(), '%m/%d/%y') vote = tds[4].text_content().strip()[1:-1] if len(vote.split('-')) == 4: yes, no, nv, exc = vote.split('-') else: yes, no, excused, absent, nv = vote.split('-') motion = tds[5].text_content().strip() passed = True if yes > no else False vote = Vote(where, date, motion, passed, int(yes), int(no), int(nv), committee=committee) vote.add_source(tds[0].xpath('string(a/@href)').strip()) bill.add_vote(vote) elif action in ('HOUSE FIRST READ', 'HOUSE SECOND READ'): aType = 'other' if re.search('HOUSE FIRST', action): aType = 'committee:referred' bill.add_action('lower', action, utils.get_date(rows[0][1]), type=aType) elif action in ('SENATE FIRST READ', 'SENATE SECOND READ'): aType = 'other' if re.search('SECOND', action): aType = 'committee:referred' bill.add_action('upper', action, utils.get_date(rows[0][1]), type=aType) elif action in ('TRANSMIT TO HOUSE', 'TRANSMIT TO SENATE'): actor = 'lower' if re.match('HOUSE', action) else 'upper' house = True if actor == 'lower' else False date = utils.get_date(rows[0][1]) bill.add_action(actor, action, date) elif re.match('COW ACTION \d', action): actor = 'lower' if house else 'upper' for row in rows[1:]: date = utils.get_date(row[1]) bill.add_action(actor, action, date, motion=row[2].text_content().strip()) elif action in ('HOUSE FINAL READ', 'SENATE FINAL READ', 'THIRD READ'): actor = 'lower' if house else 'upper' for row in rows[1:]: if row[0].text_content().strip() == 'Vote Detail': if len(row.getchildren()) == 10: detail, date, ayes, nays, nv, exc, emer, rfe, two_thirds, result = [ x.text_content().strip() for x in row ] print action_url passed = True if result == 'PASSED' else False motion = action date = datetime.datetime.strptime(date, '%m/%d/%y') if date else '' vote = Vote(actor, date, motion, passed, int(ayes), int(nays), int(nv), excused=int(exc), emergency=emer, rfe=rfe, two_thirds_vote=two_thirds, type="passage") vote.add_source(row[0].xpath('string(a/@href)').strip()) bill.add_vote(vote) elif len(row.getchildren()) == 11: detail, date, ayes, nays, nv, exc, emer, amend, rfe, two_thirds, result = [ x.text_content().strip() for x in row ] passed = True if result == 'PASSED' else False motion = action date = datetime.datetime.strptime(date, '%m/%d/%y') if date else '' vote = Vote(actor, date, motion, passed, int(ayes), int(nays), int(nv), excused=int(exc), emergency=emer, amended=amend, rfe=rfe, two_thirds_vote=two_thirds, type="passage") vote.add_source(row[0].xpath('string(a/@href)').strip()) bill.add_vote(vote) elif action == 'TRANSMITTED TO': actor = 'lower' if house else 'upper' act = action + ": " + rows[0][1].text_content().strip() date = rows[0][2].text_content().strip() date = datetime.datetime.strptime(date, '%m/%d/%y') bill.add_action(actor, act, date, type='governor:received') # need action and chaptered, chaptered version if they exists act, date, chapter, version = '', '', '', '' for row in rows[1:]: if row[0].text_content().strip() == 'ACTION:': act = row[1].text_content().strip() date = datetime.datetime.strptime(row[2].text_content().strip(), '%m/%d/%y') elif row[0].text_content().strip() == 'CHAPTER': chapter = row[1].text_content().strip() elif row[0].text_content().strip() == 'CHAPTERED VERSION': version = row[1].text_content.strip() if act: action_type = 'governor:signed' if act == 'SIGNED' else 'governor:vetoed' if chapter: bill.add_action('governor', act, date, type=action_type, chapter=chapter, chaptered_version=version) else: bill.add_action('governor', act, date, type=action_type) self.save_bill(bill) self.log("saved: " + bill['bill_id'])
def parse_status(self, bill, url): chamber = bill['chamber'] session = bill['session'] bill_id = bill['bill_id'] status = self.soup_parser(self.urlopen(url)) bill.add_source(url) act_table = status.table # Get actions for row in act_table.findAll('tr')[1:]: act_date = row.td.find(text=True) act_date = dt.datetime.strptime(act_date, "%m/%d/%Y") action = row.findAll('td')[1].find(text=True) # If not specified, assume action occurred # in originating house actor = chamber split_action = action.split('/') if len(split_action) > 1: actor = split_action[0] if actor == 'House': actor = 'lower' elif actor == 'Senate': actor = 'upper' elif actor == 'LFA': actor = 'Office of the Legislative Fiscal Analyst' action = '/'.join(split_action[1:]).strip() if action == 'Governor Signed': actor = 'Governor' bill.add_action(actor, action, act_date) # Check if this action is a vote links = row.findAll('a') if len(links) > 1: vote_url = links[-1]['href'] # Committee votes are of a different format that # we don't handle yet if not vote_url.endswith('txt'): continue vote_url = '/'.join(url.split('/')[:-1]) + '/' + vote_url vote_page = self.urlopen(vote_url) vote_re = re.compile('YEAS -?\s?(\d+)(.*)NAYS -?\s?(\d+)' '(.*)ABSENT( OR NOT VOTING)? -?\s?' '(\d+)(.*)', re.MULTILINE | re.DOTALL) match = vote_re.search(vote_page) yes_count = match.group(1) no_count = match.group(3) other_count = match.group(6) if int(yes_count) > int(no_count): passed = True else: passed = False if actor == 'upper' or actor == 'lower': vote_chamber = actor vote_location = '' else: vote_chamber = '' vote_location = actor vote = Vote(vote_chamber, act_date, action, passed, yes_count, no_count, other_count, location=vote_location) vote.add_source(vote_url) yes_votes = re.split('\s{2,}', match.group(2).strip()) no_votes = re.split('\s{2,}', match.group(4).strip()) other_votes = re.split('\s{2,}', match.group(7).strip()) map(vote.yes, yes_votes) map(vote.no, no_votes) map(vote.other, other_votes) bill.add_vote(vote)
def scrape_votes(self, chamber, url, bill, date, **kwargs): """ Scrapes the votes from a vote detail page with the legislator's names this handles all of the votes and expects the following keyword arguments: motion ... hmm I guess thats it :) """ o_args = {} passed = '' # to test if we need to compare vote counts later v_type = kwargs.pop('type') if 'passed' in kwargs: passed = {'PASSED': True, 'FAILED': False}[kwargs.pop('passed')] if 'AMEND' in kwargs: o_args['amended'] = kwargs.pop('AMEND').text_content().strip() if 'motion' in kwargs: motion = kwargs.pop('motion') if 'EMER' in kwargs and kwargs['EMER'].text_content().strip(): o_args['EMER'] = kwargs.pop('EMER').text_content().strip() if '2/3 VOTE' in kwargs and kwargs['2/3 VOTE'].text_content().strip(): o_args['2/3 VOTE'] = kwargs.pop('2/3 VOTE').text_content().strip() if 'committee' in kwargs: o_args['committee'] = utils.get_committee_name(kwargs.pop('committee'), chamber) with self.urlopen(url) as vote_page: root = html.fromstring(vote_page) vote_table = root.xpath('/html/body/div/table/tr[3]/td[4]/table/tr/td/table/tr/td/table')[0] vote_count = vote_table.xpath('following-sibling::p/following-sibling::text()') vote_string = vote_count[0].replace(u'\xa0', '').strip() v_count = re.compile(r'\b[A-Z]*\s*[A-z]*:\s\d*') v_list = v_count.findall(vote_string) o_count = 0 for x in v_list: k, v = x.split(':') # make NOT VOTING not_voting k = k.strip().replace(' ', '_').lower() v = int(v.strip()) if k == 'ayes': yes_count = int(v) elif k == 'nays': no_count = int(v) else: o_args.update({str(k):v}) o_count += int(v) if passed == '': passed = yes_count > no_count if 'committee' not in o_args: if chamber == 'upper' and passed: if 'EMER' in o_args or '2/3 VOTE' in o_args: passed = yes_count > 20 else: passed = yes_count > 16 elif chamber == 'lower' and passed: if 'EMER' in o_args or '2/3 VOTE' in o_args: passed = yes_count > 40 else: passed = yes_count > 31 vote = Vote(chamber, date, motion, passed, yes_count, no_count, o_count, type=v_type, **o_args) vote.add_source(url) # grab all the tables descendant tds tds = vote_table.xpath('descendant::td') # pair 'em up matched = [ tds[y:y+2] for y in range(0, len(tds), 2) ] for name, v in iter(matched): v = v.text_content().strip() name = name.text_content().strip() if name == 'Member Name': continue if v == 'Y': vote.yes(name) elif v == 'N': vote.no(name) else: vote.other(name) bill.add_vote(vote)
def get_vote(self, bill, url): url = 'http://www.ncga.state.nc.us' + url + '&bPrintable=true' chamber = {'H': 'lower', 'S': 'upper'}[ re.findall('sChamber=(\w)', url)[0]] data = self.urlopen(url) soup = self.soup_parser(data) motion = soup.findAll('a', href=re.compile('BillLookUp\.pl'))[0] \ .findParents('tr', limit=1)[0].findAll('td')[1] \ .font.contents[-1] vote_time = soup.findAll('b', text='Time:')[0].next.strip() vote_time = dt.datetime.strptime(vote_time, '%b %d %Y %I:%M%p') vote_mess = soup.findAll('td', text=re.compile('Total Votes:'))[0] (yeas, noes, nots, absent, excused) = map(lambda x: int(x), re.findall( 'Ayes: (\d+)\s+Noes: (\d+)\s+Not: (\d+)\s+Exc. ' 'Absent: (\d+)\s+Exc. Vote: (\d+)', vote_mess, re.U)[0]) # chamber, date, motion, passed, yes_count, no_count, other_count v = Vote(chamber, vote_time, motion, (yeas > noes), yeas, noes, nots + absent + excused) # eh, it's easier to just get table[2] for this.. vote_table = soup.findAll('table')[2] for row in vote_table.findAll('tr'): if 'Democrat' in self.flatten(row): continue cells = row.findAll('td') if len(cells) == 1: # I can't find any examples of ties in the House, # nor information on who would break them. if not self.lt_gov and chamber == 'upper': full_name = soup.findAll( 'td', text=re.compile('Lieutenant Governor'))[0] \ .parent.findAll('span')[0].contents[0] (first_name, last_name, middle_name, suffix) = split_name( full_name) self.lt_gov = Person(full_name, first_name=first_name, last_name=last_name, middle_name=middle_name, suffix=suffix) self.lt_gov.add_role('Lieutenant Governor', bill['session']) self.save_person(self.lt_gov) if 'VOTES YES' in self.flatten(cells[0]): v['passed'] = True v.yes(full_name) else: v['passed'] = False v.no(full_name) continue elif len(cells) == 2: vote_type, a = cells bunch = [self.flatten(a)] elif len(cells) == 3: vote_type, d, r = cells bunch = [self.flatten(d), self.flatten(r)] else: continue # why doesn't .string work? ... bleh. vote_type = vote_type.font.b.contents[0] if 'Ayes' in vote_type: adder = v.yes elif 'Noes' in vote_type: adder = v.no else: adder = v.other for party in bunch: party = map(lambda x: x.replace( ' (SPEAKER)', ''), party[ (party.index(':') + 1):].split(';')) if party[0] == 'None': party = [] for x in party: adder(x) v.add_source(url) bill.add_vote(v)
def parse_vote_details(self, url): """ Grab the details of a specific vote, such as how each legislator voted. """ def find_vote(letter): return vote_page.findAll('span', {'class': 'font8text'}, text=letter) with self.urlopen(url) as vote_page: vote_page = BeautifulSoup(vote_page) header = vote_page.find('div', {'class': 'subHdrGraphic'}) if 'Senate' in header.string: chamber = 'upper' else: chamber = 'lower' # we'll use the link back to the bill as a base to # get the motion/date linkback = vote_page.find( 'a', href=re.compile('billinfo')).parent.parent date = linkback.find('div').string date = dt.datetime.strptime(date, "%A, %B %d, %Y") motion = linkback.findNextSibling('div') if motion.a: motion = "%s %s" % (motion.a.string, motion.contents[-1].string.strip()) elif motion.span: motion = "%s %s" % (motion.span.string.strip(), motion.contents[-1].string.strip()) else: motion = motion.string.strip().replace(' ', '') yes_count = int(vote_page.find('div', text='YEAS').next.string) no_count = int(vote_page.find('div', text='NAYS').next.string) lve_count = int(vote_page.find('div', text='LVE').next.string) nv_count = int(vote_page.find('div', text='N/V').next.string) other_count = lve_count + nv_count passed = yes_count > no_count vote = Vote(chamber, date, motion, passed, yes_count, no_count, other_count) vote.add_source(url) # find the votes by the inner text. because background colors lie. yes_votes = [vote.yes, find_vote('Y')] no_votes = [vote.no, find_vote('N')] nv_votes = [vote.other, find_vote('E') + find_vote('X')] for (action, votes) in (yes_votes, no_votes, nv_votes): for a_vote in votes: action(a_vote.parent.findNextSibling('span').string) if len(vote['yes_votes']) != yes_count: raise ScrapeError('wrong yes count %d/%d' % (len(vote['yes_votes']), yes_count)) if len(vote['no_votes']) != no_count: raise ScrapeError('wrong no count %d/%d' % (len(vote['no_votes']), no_count)) if len(vote['other_votes']) != other_count: raise ScrapeError('wrong other count %d/%d' % (len(vote['other_votes']), other_count)) return vote
def get_vote(self, bill, url): url = "http://www.ncga.state.nc.us" + url + "&bPrintable=true" chamber = {"H": "lower", "S": "upper"}[re.findall("sChamber=(\w)", url)[0]] data = self.urlopen(url) soup = self.soup_parser(data) motion = ( soup.findAll("a", href=re.compile("BillLookUp\.pl"))[0] .findParents("tr", limit=1)[0] .findAll("td")[1] .font.contents[-1] ) vote_time = soup.findAll("b", text="Time:")[0].next.strip() vote_time = dt.datetime.strptime(vote_time, "%b %d %Y %I:%M%p") vote_mess = soup.findAll("td", text=re.compile("Total Votes:"))[0] (yeas, noes, nots, absent, excused) = map( lambda x: int(x), re.findall( "Ayes: (\d+)\s+Noes: (\d+)\s+Not: (\d+)\s+Exc. " "Absent: (\d+)\s+Exc. Vote: (\d+)", vote_mess, re.U )[0], ) # chamber, date, motion, passed, yes_count, no_count, other_count v = Vote(chamber, vote_time, motion, (yeas > noes), yeas, noes, nots + absent + excused) # eh, it's easier to just get table[2] for this.. vote_table = soup.findAll("table")[2] for row in vote_table.findAll("tr"): if "Democrat" in self.flatten(row): continue cells = row.findAll("td") if len(cells) == 1: # I can't find any examples of ties in the House, # nor information on who would break them. if not self.lt_gov and chamber == "upper": full_name = ( soup.findAll("td", text=re.compile("Lieutenant Governor"))[0] .parent.findAll("span")[0] .contents[0] ) (first_name, last_name, middle_name, suffix) = split_name(full_name) self.lt_gov = Person( full_name, first_name=first_name, last_name=last_name, middle_name=middle_name, suffix=suffix ) self.lt_gov.add_role("Lieutenant Governor", bill["session"]) self.save_person(self.lt_gov) if "VOTES YES" in self.flatten(cells[0]): v["passed"] = True v.yes(full_name) else: v["passed"] = False v.no(full_name) continue elif len(cells) == 2: vote_type, a = cells bunch = [self.flatten(a)] elif len(cells) == 3: vote_type, d, r = cells bunch = [self.flatten(d), self.flatten(r)] else: continue # why doesn't .string work? ... bleh. vote_type = vote_type.font.b.contents[0] if "Ayes" in vote_type: adder = v.yes elif "Noes" in vote_type: adder = v.no else: adder = v.other for party in bunch: party = map(lambda x: x.replace(" (SPEAKER)", ""), party[(party.index(":") + 1) :].split(";")) if party[0] == "None": party = [] for x in party: adder(x) v.add_source(url) bill.add_vote(v)