def apply_votes(self, bill): """Given a bill (and assuming it has a status_url in its dict), parse all of the votes """ bill_votes = votes.all_votes_for_url(self, bill['status_url']) for (chamber,vote_desc,pdf_url,these_votes) in bill_votes: try: date = vote_desc.split("-")[-1] except IndexError: self.warning("[%s] Couldn't get date out of [%s]" % (bill['bill_id'],vote_desc)) continue yes_votes = [] no_votes = [] other_votes = [] for voter,vote in these_votes.iteritems(): if vote == 'Y': yes_votes.append(voter) elif vote == 'N': no_votes.append(voter) else: other_votes.append(voter) passed = len(yes_votes) > len(no_votes) # not necessarily correct, but not sure where else to get it. maybe from pdf vote = Vote(standardize_chamber(chamber),date,vote_desc,passed, len(yes_votes), len(no_votes), len(other_votes),pdf_url=pdf_url) for voter in yes_votes: vote.yes(voter) for voter in no_votes: vote.no(voter) for voter in other_votes: vote.other(voter) bill.add_vote(vote)
def scrape_vote(self, bill, date, url): with self.urlopen(url) as page: page = lxml.html.fromstring(page) header = page.xpath("string(//h4[contains(@id, 'hdVote')])") location = header.split(', ')[1] if location.startswith('House'): chamber = 'lower' elif location.startswith('Senate'): chamber = 'upper' else: raise ScrapeError("Bad chamber: %s" % chamber) committee = ' '.join(location.split(' ')[1:]).strip() if not committee or committee.startswith('of Representatives'): committee = None motion = ', '.join(header.split(', ')[2:]).strip() yes_count = int( page.xpath("string(//td[contains(@id, 'tdAyes')])")) no_count = int( page.xpath("string(//td[contains(@id, 'tdNays')])")) excused_count = int( page.xpath("string(//td[contains(@id, 'tdExcused')])")) absent_count = int( page.xpath("string(//td[contains(@id, 'tdAbsent')])")) other_count = excused_count + absent_count passed = yes_count > no_count if motion.startswith('Do Pass'): type = 'passage' elif motion == 'Concurred in amendments': type = 'amendment' elif motion == 'Veto override': type = 'veto_override' else: type = 'other' vote = Vote(chamber, date, motion, passed, yes_count, no_count, other_count) vote['type'] = type if committee: vote['committee'] = committee vote.add_source(url) for td in page.xpath("//table[contains(@id, 'tblVotes')]/tr/td"): if td.text == 'Yea': vote.yes(td.getprevious().text.strip()) elif td.text == 'Nay': vote.no(td.getprevious().text.strip()) elif td.text in ('Excused', 'Absent'): vote.other(td.getprevious().text.strip()) bill.add_vote(vote)
def parse_vote_new(self, bill, chamber, url): vote_page = BeautifulSoup(self.urlopen(url)) table = vote_page.table info_row = table.findAll('tr')[1] date = info_row.td.contents[0] date = dt.datetime.strptime(date, '%m/%d/%Y') motion = info_row.findAll('td')[1].contents[0] yes_count = int(info_row.findAll('td')[2].contents[0]) no_count = int(info_row.findAll('td')[3].contents[0]) abs_count = int(info_row.findAll('td')[4].contents[0]) passed = info_row.findAll('td')[5].contents[0] == 'Pass' vote = Vote(chamber, date, motion, passed, yes_count, no_count, abs_count) vote.add_source(url) for tr in table.findAll('tr')[3:]: if len(tr.findAll('td')) != 2: continue name = tr.td.contents[0].split(' of')[0] type = tr.findAll('td')[1].contents[0] if type.startswith('Yea'): vote.yes(name) elif type.startswith('Nay'): vote.no(name) else: vote.other(name) bill.add_vote(vote)
def scrape(self, chamber, session): self.validate_session(session) if chamber == 'upper': other_chamber = 'lower' bill_id = 'SB 1' else: other_chamber = 'upper' bill_id = 'HB 1' b1 = Bill(session, chamber, bill_id, 'A super bill') b1.add_source('http://example.com/') b1.add_version('As Introduced', 'http://example.com/SB1.html') b1.add_document('Google', 'http://google.com') b1.add_sponsor('primary', 'Bob Smith') b1.add_sponsor('secondary', 'Johnson, Sally') d1 = datetime.datetime.strptime('1/29/2010', '%m/%d/%Y') v1 = Vote('upper', d1, 'Final passage', True, 2, 0, 0) v1.yes('Smith') v1.yes('Johnson') d2 = datetime.datetime.strptime('1/30/2010', '%m/%d/%Y') v2 = Vote('lower', d2, 'Final passage', False, 0, 1, 1) v2.no('Bob Smith') v2.other('S. Johnson') b1.add_vote(v1) b1.add_vote(v2) b1.add_action(chamber, 'introduced', d1) b1.add_action(chamber, 'read first time', d2) b1.add_action(other_chamber, 'introduced', d2) self.save_bill(b1)
def scrape_vote(self, bill, name, url): match = re.match('^(Senate|House) Vote on [^,]*,(.*)$', name) if not match: return chamber = {'Senate': 'upper', 'House': 'lower'}[match.group(1)] motion = match.group(2).strip() if motion.startswith('FINAL PASSAGE'): type = 'passage' elif motion.startswith('AMENDMENT'): type = 'amendment' elif 'ON 3RD READINT' in motion: type = 'reading:3' else: type = 'other' vote = Vote(chamber, None, motion, None, None, None, None) vote['type'] = type vote.add_source(url) with self.urlopen(url) as text: (fd, temp_path) = tempfile.mkstemp() with os.fdopen(fd, 'wb') as w: w.write(text) html = pdf_to_lxml(temp_path) os.remove(temp_path) vote_type = None total_re = re.compile('^Total--(\d+)$') body = html.xpath('string(/html/body)') for line in body.replace(u'\xa0', '\n').split('\n'): line = line.replace(' ', '').strip() if not line: continue if line in ('YEAS', 'NAYS', 'ABSENT'): vote_type = {'YEAS': 'yes', 'NAYS': 'no', 'ABSENT': 'other'}[line] elif vote_type: match = total_re.match(line) if match: vote['%s_count' % vote_type] = int(match.group(1)) elif vote_type == 'yes': vote.yes(line) elif vote_type == 'no': vote.no(line) elif vote_type == 'other': vote.other(line) # The PDFs oddly don't say whether a vote passed or failed. # Hopefully passage just requires yes_votes > not_yes_votes if vote['yes_count'] > (vote['no_count'] + vote['other_count']): vote['passed'] = True else: vote['passed'] = False bill.add_vote(vote)
def scrape_vote(self, bill, name, url): match = re.match("^(Senate|House) Vote on [^,]*,(.*)$", name) if not match: return chamber = {"Senate": "upper", "House": "lower"}[match.group(1)] motion = match.group(2).strip() if motion.startswith("FINAL PASSAGE"): type = "passage" elif motion.startswith("AMENDMENT"): type = "amendment" elif "ON 3RD READINT" in motion: type = "reading:3" else: type = "other" vote = Vote(chamber, None, motion, None, None, None, None) vote["type"] = type vote.add_source(url) with self.urlopen(url) as text: (fd, temp_path) = tempfile.mkstemp() with os.fdopen(fd, "wb") as w: w.write(text) html = pdf_to_lxml(temp_path) os.remove(temp_path) vote_type = None total_re = re.compile("^Total--(\d+)$") body = html.xpath("string(/html/body)") for line in body.replace(u"\xa0", "\n").split("\n"): line = line.replace(" ", "").strip() if not line: continue if line in ("YEAS", "NAYS", "ABSENT"): vote_type = {"YEAS": "yes", "NAYS": "no", "ABSENT": "other"}[line] elif vote_type: match = total_re.match(line) if match: vote["%s_count" % vote_type] = int(match.group(1)) elif vote_type == "yes": vote.yes(line) elif vote_type == "no": vote.no(line) elif vote_type == "other": vote.other(line) # The PDFs oddly don't say whether a vote passed or failed. # Hopefully passage just requires yes_votes > not_yes_votes if vote["yes_count"] > (vote["no_count"] + vote["other_count"]): vote["passed"] = True else: vote["passed"] = False bill.add_vote(vote)
def record_votes(root): for el in root.xpath(u'//p[starts-with(., "Yeas \u2014")]'): text = ''.join(el.getprevious().itertext()) text.replace('\n', ' ') m = re.search(r'(?P<bill_id>\w+\W+\d+)(,?\W+as\W+amended,?)?\W+was\W+' '(?P<type>adopted|passed' '(\W+to\W+(?P<to>engrossment|third\W+reading))?)\W+' 'by\W+\(Record\W+(?P<record>\d+)\):\W+' '(?P<yeas>\d+)\W+Yeas,\W+(?P<nays>\d+)\W+Nays,\W+' '(?P<present>\d+)\W+Present', text) if m: yes_count = int(m.group('yeas')) no_count = int(m.group('nays')) other_count = int(m.group('present')) bill_id = m.group('bill_id') if bill_id.startswith('H') or bill_id.startswith('CSHB'): bill_chamber = 'lower' elif bill_id.startswith('S') or bill_id.startswith('CSSB'): bill_chamber = 'upper' else: continue motion = get_motion(m) type = get_type(motion) vote = Vote(None, None, motion, True, yes_count, no_count, other_count) vote['bill_id'] = bill_id vote['bill_chamber'] = bill_chamber vote['session'] = '81' vote['method'] = 'record' vote['record'] = m.group('record') vote['filename'] = m.group('record') vote['type'] = type for name in names(el): vote.yes(name) el = el.getnext() if el.text and el.text.startswith('Nays'): for name in names(el): vote.no(name) el = el.getnext() while el.text and re.match(r'Present|Absent', el.text): for name in names(el): vote.other(name) el = el.getnext() vote['other_count'] = len(vote['other_votes']) yield vote else: pass
def scrape_votes(self, bill, bill_type, number, session): vote_url = ('http://www.legislature.state.oh.us/votes.cfm?ID=' + session + '_' + bill_type + '_' + str(number)) with self.urlopen(vote_url) as page: page = lxml.etree.fromstring(page, lxml.etree.HTMLParser()) for jlink in page.xpath("//a[contains(@href, 'JournalText')]"): date = datetime.datetime.strptime(jlink.text, "%m/%d/%Y").date() details = jlink.xpath("string(../../../td[2])") chamber = details.split(" - ")[0] if chamber == 'House': chamber = 'lower' elif chamber == 'Senate': chamber = 'upper' else: raise ScrapeError("Bad chamber: %s" % chamber) motion = details.split(" - ")[1].split("\n")[0].strip() vote_row = jlink.xpath("../../..")[0].getnext() yea_div = vote_row.xpath( "td/font/div[contains(@id, 'Yea')]")[0] yeas = [] for td in yea_div.xpath("table/tr/td"): name = td.xpath("string()") if name: yeas.append(name) no_div = vote_row.xpath( "td/font/div[contains(@id, 'Nay')]")[0] nays = [] for td in no_div.xpath("table/tr/td"): name = td.xpath("string()") if name: nays.append(name) yes_count = len(yeas) no_count = len(nays) vote = Vote(chamber, date, motion, yes_count > no_count, yes_count, no_count, 0) for yes in yeas: vote.yes(yes) for no in nays: vote.no(no) bill.add_vote(vote)
def get_text_vote_results(self, bill, vote_date, motion_name, vote_data): vote = Vote(bill['chamber'], vote_date, motion_name, None, 0, 0, 0) counting_yeas = False counting_nays = False for line in vote_data.splitlines(): if line.find("Motion:") == 0: line = line.strip().upper() for x in ['DO CONCUR', 'DO PASS', 'DO ADOPT', ]: if line.find(x) >= 0: vote['passed'] = True elif ((line.find("Yeas:") == 0) or (line.find("Ayes:") == 0)): counting_yeas = True counting_nays = False elif ((line.find("Nays:") == 0) or (line.find("Noes") == 0)): counting_yeas = False counting_nays = True elif line.find("Total ") == 0: if not (counting_yeas or counting_nays): vote['other_count'] += int(line.split()[1].strip()) elif line == '': counting_yeas = False counting_nays = False if counting_yeas: if line.find("Total ") == 0: vote['yes_count'] = int(line.split()[1].strip()) line = "" if line.find(":") != -1: line = line[line.find(":")+1:] for name in line.split(","): name = name.strip() if name != '': if name[-1] == '.': name = name[0:-1] vote.yes(name) if counting_nays: if line.find("Total ") == 0: vote['no_count'] = int(line.split()[1].strip()) line = "" if line.find(":") != -1: line = line[line.find(":")+1:] for name in line.split(","): name = name.strip() if name != '': if name[-1] == '.': name = name[0:-1] vote.no(name) return vote
def scrape_old_vote(self, url): vote_page = self.soup_parser(self.urlopen(url)) header = vote_page.h3.contents[0] chamber_name = header.split(', ')[1] if chamber_name.startswith('House'): chamber = 'lower' else: chamber = 'upper' location = ' '.join(chamber_name.split(' ')[1:]) if location.startswith('of Representatives'): location = '' motion = ', '.join(header.split(', ')[2:]) def get_count(cell): if len(cell.contents) == 0: return 0 else: return int(cell.contents[0]) results_tbl = vote_page.findAll('table')[1] yes_count = get_count(results_tbl.findAll('td')[1]) no_count = get_count(results_tbl.findAll('td')[3]) excused_count = get_count(results_tbl.findAll('td')[5]) absent_count = get_count(results_tbl.findAll('td')[7]) other_count = excused_count + absent_count passed = yes_count > no_count vote = Vote(chamber, None, motion, passed, yes_count, no_count, other_count, excused_count=excused_count, absent_count=absent_count, location=location) vote.add_source(url) vote_tbl = vote_page.table for td in vote_tbl.findAll('td'): if td.contents[0] == 'Yea': vote.yes(td.findPrevious().contents[0]) elif td.contents[0] == 'Nay': vote.no(td.findPrevious().contents[0]) elif td.contents[0] in ['Excused', 'Absent']: vote.other(td.findPrevious().contents[0]) return vote
def scrape_votes(self, bill, sponsor, link): with self.urlopen(link) as page: page = lxml.html.fromstring(page) raw_vote_data = page.xpath("//span[@id='lblVoteData']")[0].text_content() raw_vote_data = raw_vote_data.strip().split('%s by %s - ' % (bill['bill_id'], sponsor))[1:] for raw_vote in raw_vote_data: raw_vote = raw_vote.split(u'\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0') motion = raw_vote[0] vote_date = re.search('(\d+/\d+/\d+)', motion) if vote_date: vote_date = datetime.datetime.strptime(vote_date.group(), '%m/%d/%Y') passed = ('Passed' in motion) or ('Adopted' in raw_vote[1]) vote_regex = re.compile('\d+$') aye_regex = re.compile('^.+voting aye were: (.+) -') no_regex = re.compile('^.+voting no were: (.+) -') yes_count = None no_count = None other_count = 0 ayes = [] nos = [] for v in raw_vote[1:]: if v.startswith('Ayes...') and vote_regex.search(v): yes_count = int(vote_regex.search(v).group()) elif v.startswith('Noes...') and vote_regex.search(v): no_count = int(vote_regex.search(v).group()) elif aye_regex.search(v): ayes = aye_regex.search(v).groups()[0].split(', ') elif no_regex.search(v): nos = no_regex.search(v).groups()[0].split(', ') if yes_count and no_count: passed = yes_count > no_count else: yes_count = no_count = 0 vote = Vote(bill['chamber'], vote_date, motion, passed, yes_count, no_count, other_count) vote.add_source(link) for a in ayes: vote.yes(a) for n in nos: vote.no(n) bill.add_vote(vote) return bill
def scrape_lower_vote(self, url): with self.urlopen(url) as page: page = lxml.html.fromstring(page) table = page.xpath("/html/body/table/tr[3]/td/table/tr/" "td[3]/table/tr/td/table[3]")[0] motion = "" for part in ("Amendment Number", "Reading Number", "Floor Actions"): motion += page.xpath("string(//*[contains(text(), '%s')])" % part).strip() + " " motion = motion.strip() date = page.xpath( 'string(//*[contains(text(), "Date:")]/following-sibling::*)') date = datetime.datetime.strptime(date, "%m/%d/%Y") yeas = page.xpath('string(//*[contains(text(), "Yeas")])') yeas = int(yeas.split(' - ')[1]) nays = page.xpath('string(//*[contains(text(), "Nays")])') nays = int(nays.split(' - ')[1]) nv = page.xpath('string(//*[contains(text(), "Not Voting")])') nv = int(nv.split(' - ')[1]) passed = yeas > (nays + nv) vote = Vote('lower', date, motion, passed, yeas, nays, nv) vote.add_source(url) for tr in table.xpath("tr/td/table/tr"): text = tr.xpath("string()") text = re.sub(r"\s+", r" ", text) name = " ".join(text.split()[1:]) if text[0] == "Y": vote.yes(name) elif text[0] == "N": vote.no(name) elif text[0] in ("-", "C"): vote.other(name) return vote
def scrape_new_vote(self, url): vote_page = self.soup_parser(self.urlopen(url)) header = vote_page.find(id="ctl00_contentMain_hdVote").contents[0] chamber_name = header.split(', ')[1] if chamber_name.startswith('House'): chamber = 'lower' else: chamber = 'upper' location = ' '.join(chamber_name.split(' ')[1:]) if location.startswith('of Representatives'): location = '' motion = ', '.join(header.split(', ')[2:]) yes_count = int(vote_page.find( id="ctl00_contentMain_tdAyes").contents[0]) no_count = int(vote_page.find( id="ctl00_contentMain_tdNays").contents[0]) excused_count = int(vote_page.find( id="ctl00_contentMain_tdExcused").contents[0]) absent_count = int(vote_page.find( id="ctl00_contentMain_tdAbsent").contents[0]) other_count = excused_count + absent_count passed = yes_count > no_count vote = Vote(chamber, None, motion, passed, yes_count, no_count, other_count, excused_count=excused_count, absent_count=absent_count, location=location) vote.add_source(url) vote_tbl = vote_page.find(id="ctl00_contentMain_tblVotes") for td in vote_tbl.findAll('td'): if td.contents[0] == 'Yea': vote.yes(td.findPrevious().contents[0]) elif td.contents[0] == 'Nay': vote.no(td.findPrevious().contents[0]) elif td.contents[0] in ['Excused', 'Absent']: vote.other(td.findPrevious().contents[0]) return vote
def parse_vote(self, bill, actor, date, motion, url): with self.urlopen(url) as page: vote_re = re.compile('YEAS -?\s?(\d+)(.*)NAYS -?\s?(\d+)' '(.*)ABSENT( OR NOT VOTING)? -?\s?' '(\d+)(.*)', re.MULTILINE | re.DOTALL) match = vote_re.search(page) yes_count = int(match.group(1)) no_count = int(match.group(3)) other_count = int(match.group(6)) if yes_count > no_count: passed = True else: passed = False if actor == 'upper' or actor == 'lower': vote_chamber = actor vote_location = '' else: vote_chamber = '' vote_location = actor vote = Vote(vote_chamber, date, motion, passed, yes_count, no_count, other_count, location=vote_location) vote.add_source(url) yes_votes = re.split('\s{2,}', match.group(2).strip()) no_votes = re.split('\s{2,}', match.group(4).strip()) other_votes = re.split('\s{2,}', match.group(7).strip()) for yes in yes_votes: if yes: vote.yes(yes) for no in no_votes: if no: vote.no(no) for other in other_votes: if other: vote.other(other) bill.add_vote(vote)
def scrape_vote(self, bill, chamber, url): with self.urlopen(url) as page: page = page.replace(' ', ' ') page = lxml.html.fromstring(page) info_row = page.xpath("//table[1]/tr[2]")[0] date = info_row.xpath("string(td[1])") date = datetime.datetime.strptime(date, "%m/%d/%Y") motion = info_row.xpath("string(td[2])") yes_count = int(info_row.xpath("string(td[3])")) no_count = int(info_row.xpath("string(td[4])")) other_count = int(info_row.xpath("string(td[5])")) passed = info_row.xpath("string(td[6])") == 'Pass' if motion == 'Shall the bill pass?': type = 'passage' elif motion == 'Shall the bill be read the third time?': type = 'reading:3' elif 'be amended as' in motion: type = 'amendment' else: type = 'other' vote = Vote(chamber, date, motion, passed, yes_count, no_count, other_count) vote.add_source(url) for tr in page.xpath("//table[1]/tr")[3:]: if len(tr.xpath("td")) != 2: continue name = tr.xpath("string(td[1])").split(' of')[0] type = tr.xpath("string(td[2])").strip() if type == 'Yea': vote.yes(name) elif type == 'Nay': vote.no(name) else: vote.other(name) bill.add_vote(vote)
def scrape_votes(self, bill_page, bill, insert, year): root = lxml.html.fromstring(bill_page) for link in root.xpath('//a[contains(text(), "Passage")]'): motion = link.text if 'Assembly' in motion: chamber = 'lower' else: chamber = 'upper' vote_url = 'http://www.leg.state.nv.us/Session/%s/Reports/%s' % ( insert, link.get('href')) bill.add_source(vote_url) with self.urlopen(vote_url) as page: page = page.decode("utf8").replace(u"\xa0", " ") root = lxml.html.fromstring(page) date = root.xpath('string(/html/body/center/font)').split()[-1] date = date + "-" + str(year) date = datetime.strptime(date, "%m-%d-%Y") yes_count = int(root.xpath('string(/html/body/center/table/tr/td[1])').split()[0]) no_count = int(root.xpath('string(/html/body/center/table/tr/td[2])').split()[0]) excused = int(root.xpath('string(/html/body/center/table/tr/td[3])').split()[0]) not_voting = int(root.xpath('string(/html/body/center/table/tr/td[4])').split()[0]) absent = int(root.xpath('string(/html/body/center/table/tr/td[5])').split()[0]) other_count = excused + not_voting + absent passed = yes_count > no_count vote = Vote(chamber, date, motion, passed, yes_count, no_count, other_count, not_voting=not_voting, absent=absent) for el in root.xpath('/html/body/table[2]/tr'): name = el.xpath('string(td[1])').strip() full_name = '' for part in name: full_name = full_name + part + " " name = str(name) vote_result = el.xpath('string(td[2])').split()[0] if vote_result == 'Yea': vote.yes(name) elif vote_result == 'Nay': vote.no(name) else: vote.other(name) bill.add_vote(vote)
def scrape_votes(self, bill_page, bill, chamber, insert, motion, year): root = lxml.etree.fromstring(bill_page, lxml.etree.HTMLParser()) url_path = ('/html/body/div[@id="content"]/table[5]/tr/td/a') for mr in root.xpath(url_path): url_end = mr.xpath('string(@href)') vote_url = 'http://www.leg.state.nv.us/Session/%s/Reports/%s' % (insert, url_end) bill.add_source(vote_url) with self.urlopen(vote_url) as page: page = page.decode("utf8").replace(u"\xa0", " ") root = lxml.etree.fromstring(page, lxml.etree.HTMLParser()) date = root.xpath('string(/html/body/center/font)').split()[-1] date = date + "-" + str(year) date = datetime.strptime(date, "%m-%d-%Y") yes_count = root.xpath('string(/html/body/center/table/tr/td[1])').split()[0] no_count = root.xpath('string(/html/body/center/table/tr/td[2])').split()[0] excused = root.xpath('string(/html/body/center/table/tr/td[3])').split()[0] not_voting = root.xpath('string(/html/body/center/table/tr/td[4])').split()[0] absent = root.xpath('string(/html/body/center/table/tr/td[5])').split()[0] other_count = 0 if yes_count > no_count: passed = True else: passed = False vote = Vote(chamber, date, motion, passed, int(yes_count), int(no_count), other_count, not_voting = int(not_voting), absent = int(absent)) for el in root.xpath('/html/body/table[2]/tr'): name = el.xpath('string(td[1])').strip() full_name = '' for part in name: full_name = full_name + part + " " name = str(name) vote_result = el.xpath('string(td[2])').split()[0] if vote_result == 'Yea': vote.yes(name) elif vote_result == 'Nay': vote.no(name) else: vote.other(name) bill.add_vote(vote)
def scrape_votes(self, vote_text, vote_url, house, date, bill): votes_parts = vote_text.split(";") voters = [] motion_text, sep, after = vote_text.partition(" The votes were as follows:") for vp in votes_parts: before, sep, after = vp.partition("(s)") voters_list = after.split(", ") voters_list[0] = voters_list[0].lstrip(" ") voters_list[-1] = voters_list[-1].rstrip(". ") voters.append(voters_list) #Ayes, Ayes with reservations, Noes, Excused vote_counts = [0, 0, 0, 0] for i, t in enumerate(votes_parts): match = re.search("[0-9]+", t) if (match != None): vote_counts[i] = int(match.group(0)) if(house == 'H'): vote_house = "lower" else: vote_house = "upper" vote = Vote(vote_house, date, motion_text, True, \ vote_counts[0], vote_counts[2], vote_counts[1] + vote_counts[3]) vote.add_source(vote_url) for yes_voter in voters[0]: vote.yes(yes_voter) for no_voter in voters[2]: vote.no(no_voter) for other_voter in voters[1]: vote.other(other_voter) for other_voter in voters[2]: vote.other(other_voter) bill.add_vote(vote)
def scrape_votes(self, bill_url, bill, chamber, insert, motion): with self.urlopen(bill_url) as page: root = lxml.etree.fromstring(page, lxml.etree.HTMLParser()) url_path = ('/html/body/div[@id="content"]/table[5]/tr/td/a') for mr in root.xpath(url_path): url_end = mr.xpath('string(@href)') vote_url = 'http://www.leg.state.nv.us/Session/%s/Reports/%s' % (insert, url_end) with self.urlopen(vote_url) as page: root = lxml.etree.fromstring(page, lxml.etree.HTMLParser()) date = root.xpath('string(/html/body/center/font)').split()[-1] yes_count = root.xpath('string(/html/body/center/table/tr/td[1])').split()[0] no_count = root.xpath('string(/html/body/center/table/tr/td[2])').split()[0] excused = root.xpath('string(/html/body/center/table/tr/td[3])').split()[0] not_voting = root.xpath('string(/html/body/center/table/tr/td[4])').split()[0] absent = root.xpath('string(/html/body/center/table/tr/td[5])').split()[0] if yes_count > no_count: passed = True else: passed = False vote = Vote(chamber, date, motion, passed, yes_count, no_count, '', not_voting = not_voting, absent = absent) for el in root.xpath('/html/body/table[2]/tr'): name = el.xpath('string(td[1])').strip() full_name = '' for part in name: full_name = full_name + part + " " name = str(name) vote_result = el.xpath('string(td[2])').split()[0] if vote_result == 'Yea': vote.yes(name) elif vote_result == 'Nay': vote.no(name) else: vote.other(name) bill.add_vote(vote)
def scrape_vote(self, chamber, session, bill_id, vote_url): NO_VOTE_URL = 'http://www.house.leg.state.mn.us/votes/novotefound.asp' with self.urlopen(vote_url) as html: # sometimes the link is broken, will redirect to NO_VOTE_URL if html.response.url == NO_VOTE_URL: return doc = lxml.html.fromstring(html) paragraphs = doc.xpath('//h1/following-sibling::p') # first paragraph has motion and vote total top_par = paragraphs[0].text_content() lines = top_par.splitlines() # 3rd line is the motion except in cases where first line is gone motion = lines[2] or lines[1] # last line is "__ YEA and __ Nay" yeas, nays = self.yeanay_re.match(lines[-1]).groups() yeas = int(yeas) nays = int(nays) # second paragraph has date date = self.date_re.match(paragraphs[1].text_content()).groups()[0] date = datetime.datetime.strptime(date, '%m/%d/%Y') filename = 'vote%s-%s' % (self.sequence.next(), bill_id) vote = Vote('lower', date, motion, yeas>nays, yeas, nays, 0, session=session, bill_id=bill_id, bill_chamber=chamber, filename=filename) vote.add_source(vote_url) # first table has YEAs for name in doc.xpath('//table[1]/tr/td/font/text()'): vote.yes(name.strip()) # second table is nays for name in doc.xpath('//table[2]/tr/td/font/text()'): vote.no(name.strip()) self.save_vote(vote)
def scrape_upper_vote(self, url): with self.urlopen(url) as page: page = lxml.html.fromstring(page) text = page.xpath('string(//pre)') motion = "" motion += re.search(r'Amendment Number:\s([^\s]+)?', text, re.MULTILINE).group(0).strip() motion += " " + re.search(r'Reading Number .:\s([^\s]+)?', text, re.MULTILINE).group(0).strip() motion += " " + re.search(r'Floor Actions ..:\s([^\s]+)?', text, re.MULTILINE).group(0).strip() yeas = int(re.search(r'Yeas\s-\s(\d+)', text, re.MULTILINE).group(1)) nays = int(re.search(r'Nays\s-\s(\d+)', text, re.MULTILINE).group(1)) nv = int(re.search(r'Not\sVoting\s-\s(\d+)', text, re.MULTILINE).group(1)) date = re.search(r'Date:\s(\d+/\d+/\d+)', text, re.MULTILINE).group(1) date = datetime.datetime.strptime(date, '%m/%d/%Y') passed = yeas > (nays + nv) vote = Vote('upper', date, motion, passed, yeas, nays, nv) vote.add_source(url) pattern = r'_\s%s\s(\w+)' for match in re.finditer(pattern % 'Y ', text, re.MULTILINE): vote.yes(match.group(1)) for match in re.finditer(pattern % 'N ', text, re.MULTILINE): vote.no(match.group(1)) for match in re.finditer(pattern % 'EX', text, re.MULTILINE): vote.other(match.group(1)) return vote
def parse_roll_call(self, url, chamber, date): with self.urlopen(url) as page: page = lxml.html.fromstring(page) motion = page.xpath("//div[@class='font8text']")[3].text.strip() if motion == 'FP': motion = 'FINAL PASSAGE' if motion == 'FINAL PASSAGE': type = 'passage' elif re.match(r'CONCUR(RENCE)? IN \w+ AMENDMENTS', motion): type = 'amendment' else: type = 'other' yeas = int(page.xpath("//div[text() = 'YEAS']")[0].getnext().text) nays = int(page.xpath("//div[text() = 'NAYS']")[0].getnext().text) lve = int(page.xpath("//div[text() = 'LVE']")[0].getnext().text) nv = int(page.xpath("//div[text() = 'N/V']")[0].getnext().text) other = lve + nv passed = yeas > (nays + other) vote = Vote(chamber, date, motion, passed, yeas, nays, other, type=type) for span in page.xpath("//span[text() = 'Y' or text() = 'N'" "or text() = 'X' or text() = 'E']"): name = span.getnext().text.strip() if span.text == 'Y': vote.yes(name) elif span.text == 'N': vote.no(name) else: vote.other(name) return vote
def get_html_vote_results(self, bill, motion_name, vote_data): vote = Vote(bill['chamber'], None, motion_name, False, 0, 0, 0) if vote_data.count("No Vote Records Found for this Action.") > 0: raise NoVoteDataException() passage_indicators = ['Do Pass', 'Do Concur'] for line in vote_data.splitlines(): if line in passage_indicators: vote['passed'] = True vote_data = ElementTree(lxml.html.fromstring(vote_data)) for table in vote_data.findall("//table"): left_header = table.findall("tr")[0].findall("th")[0].text.strip() if 'YEAS' == left_header: count_row = table.findall("tr")[-1] vote['yes_count'] = int(count_row.findall("td")[0].text) vote['no_count'] = int(count_row.findall("td")[1].text) other_count = int(count_row.findall("td")[2].text) vote['other_count'] = int(count_row.findall("td")[3].text) + other_count elif (('' == left_header) and (4 == len(table.findall("tr")[0].findall("th")))): for data in ElementTree(table).findall("//td"): vote_value, name = data.text.replace(u"\xa0", " ").split(" ", 1) vote_value = vote_value.strip() name = name.strip() if name != "": if vote_value == 'Y': vote.yes(name) elif vote_value == 'N': vote.no(name) else: vote.other(name) elif (('' == left_header) and (0 == table.findall("tr")[1].findall("td")[0].text.find("DATE:"))): date = table.findall("tr")[1].findall("td")[0].text date = datetime.strptime(date.replace("DATE:", "").strip(), "%B %d, %Y") vote['date'] = date return vote
def scrape_vote(self, bill, vote_type_id, vote_type): base_url = 'http://www.dccouncil.washington.dc.us/lims/voting.aspx?VoteTypeID=%s&LegID=%s' url = base_url % (vote_type_id, bill['bill_id']) with self.urlopen(url) as html: doc = lxml.html.fromstring(html) vote_date = convert_date(doc.get_element_by_id('VoteDate').text) # check if voice vote / approved boxes have an 'x' voice = (doc.xpath('//span[@id="VoteTypeVoice"]/b/text()')[0] == 'x') passed = (doc.xpath('//span[@id="VoteResultApproved"]/b/text()')[0] == 'x') yes_count = extract_int(doc.xpath( '//span[@id="VoteCount1"]/b/text()')[0]) no_count = extract_int(doc.xpath( '//span[@id="VoteCount2"]/b/text()')[0]) other_count = 13 - (yes_count+no_count) # a bit lazy vote = Vote('upper', vote_date, vote_type, passed, yes_count, no_count, other_count, voice_vote=voice) vote.add_source(url) # members are only text on page in a <u> tag for member_u in doc.xpath('//u'): member = member_u.text vote_text = member_u.xpath('../../i/text()')[0] if 'YES' in vote_text: vote.yes(member) elif 'NO' in vote_text: vote.no(member) else: vote.other(member) bill.add_vote(vote)
def scrape(self, chamber, year): session = "%s%d" % (year, int(year) + 1) if session not in [s_ for t in metadata['terms'] for s_ in t['sessions']]: raise NoDataForPeriod(year) if chamber == 'upper': measure_abbr = 'SB' chamber_name = 'SENATE' house_type = 'S' else: measure_abbr = 'AB' chamber_name = 'ASSEMBLY' house_type = 'A' bills = self.session.query(CABill).filter_by( session_year=session).filter_by( measure_type=measure_abbr) for bill in bills: bill_session = session if bill.session_num != '0': bill_session += ' Special Session %s' % bill.session_num bill_id = bill.short_bill_id version = self.session.query(CABillVersion).filter_by( bill=bill).filter(CABillVersion.bill_xml != None).first() if not version: # not enough data to import continue fsbill = Bill(bill_session, chamber, bill_id, version.title, short_title=version.short_title) for author in version.authors: if author.house == chamber_name: fsbill.add_sponsor(author.contribution, author.name) for action in bill.actions: if not action.action: # NULL action text seems to be an error on CA's part, # unless it has some meaning I'm missing continue actor = action.actor or chamber actor = actor.strip() match = re.match(r'(Assembly|Senate)($| \(Floor)', actor) if match: actor = {'Assembly': 'lower', 'Senate': 'upper'}[match.group(1)] elif actor.startswith('Governor'): actor = 'executive' else: actor = re.sub('^Assembly', 'lower', actor) actor = re.sub('^Senate', 'upper', actor) type = [] act_str = action.action if act_str.startswith('Introduced'): type.append('bill:introduced') if 'To Com' in act_str: type.append('committee:referred') if 'Read third time. Passed.' in act_str: type.append('bill:passed') if 'Approved by Governor' in act_str: type.append('bill:signed') if 'Item veto' in act_str: type.append('veto:line-item') if not type: type = ['other'] fsbill.add_action(actor, act_str, action.action_date, type=type) for vote in bill.votes: if vote.vote_result == '(PASS)': result = True else: result = False full_loc = vote.location.description first_part = full_loc.split(' ')[0].lower() if first_part in ['asm', 'assembly']: vote_chamber = 'lower' vote_location = ' '.join(full_loc.split(' ')[1:]) elif first_part.startswith('sen'): vote_chamber = 'upper' vote_location = ' '.join(full_loc.split(' ')[1:]) else: vote_chamber = '' vote_location = full_loc fsvote = Vote(vote_chamber, vote.vote_date_time, vote.motion.motion_text or '', result, vote.ayes, vote.noes, vote.abstain, threshold=vote.threshold, location=vote_location) for record in vote.votes: if record.vote_code == 'AYE': fsvote.yes(record.legislator_name) elif record.vote_code.startswith('NO'): fsvote.no(record.legislator_name) else: fsvote.other(record.legislator_name) fsbill.add_vote(fsvote) self.save_bill(fsbill)
def scrape_votes(self, vote_page, bill, url): date_match = re.search("[0-9]{1,2}/[0-9]{1,2}/[0-9]{4}", vote_page.text_content()) date_match = date_match.group(0) vote_date = dt.datetime.strptime(date_match, '%m/%d/%Y') votes = {"Yeas":0, "Nays":0, "Absent":0, "Excused":0} for type, number in votes.items(): match = re.search(type + ": [0-9]+", vote_page.text_content()) match = match.group(0) match = match.split(" ") number = match[1] passed = votes["Yeas"] > votes["Nays"] chamber_match = re.search("(Senate|House) vote", vote_page.text_content()) chamber_match = chamber_match.group(0) chamber_match = chamber_match.split(" ") chamber_match = chamber_match[0] if chamber_match == "Senate": chamber = "upper" title = "Senator" else: chamber = "lower" title = "Representative" motion_match = vote_page.cssselect('td[align="center"]') motion_match = motion_match[2] motion = motion_match.text_content() vote = Vote(chamber, vote_date, motion, passed, votes["Yeas"], votes["Nays"], votes["Absent"] + votes["Excused"]) vote.add_source(url) vote_elements = vote_page.cssselect('span[class="RollCall"]') vote_types = [] for ve in vote_elements: voters = ve.text_content().split(", ") if len(voters) == 1: voters = voters[0].split(" and ") before, itself, after = voters[0].partition(title) voters[0] = after.lstrip("s ") voters[-1] = voters[-1].lstrip("and ") vote_types.append(voters) for v in vote_types[0]: vote.yes(v) for v in vote_types[1]: vote.no(v) for v in vote_types[2]: vote.other(v) for v in vote_types[3]: vote.other(v) bill.add_vote(vote)
def scrape_votes(self, url, chamb): with self.urlopen(url) as doc: soup = BeautifulSoup(doc) date = None motion = None yeas = None neas = None others = None passed = None chamber = chamb necessary = None vote = None fonts = soup.findAll("font") span = soup.findAll("span") if (len(fonts) + (len(span))) > 4: # data is vaguely structured if len(fonts) < 4: fonts = span for line in fonts: # this could be sped up. line = str(line.contents[0]) line = line.strip() if line.find("Taken on") > -1: # then the text is in the form of: "Take on <date> <reason>" split = line.split(None, 3) date = split[2] if len(split) > 3: motion = split[3] elif line.find("Those voting Yea") > -1: yeas = self.get_num_from_line(line) elif line.find("Those voting Nay") > -1: neas = self.get_num_from_line(line) elif line.find("Those absent and not voting") > -1: others = self.get_num_from_line(line) elif (line.find("Necessary for Adoption") > -1) or (line.find("Necessary for Passage") > -1): necessary = self.get_num_from_line(line) if yeas >= necessary: passed = True else: passed = False vote = Vote(chamber, date, motion, passed, yeas, neas, others) # figure out who voted for what table = soup.findAll("table") tds = table[len(table) - 1].findAll("td") # get the last table vote_value = None digits = re.compile("^[\d ]+$") for cell in tds: string = cell.find("font") if string == None: string = cell.find("span") # either we are looking at fonts or spans if string != None: string = string.contents[0] string = string.strip() else: string = "" if (len(string) > 0) and (digits.search(string) == None): if vote_value == None: if (string == "Y") or (string == "N"): vote_value = string elif (string == "X") or (string == "A"): vote_value = "X" else: if vote_value == "Y": vote.yes(string) elif vote_value == "N": vote.no(string) else: vote.other(string) vote_value = None else: # data is mostly unstructured. Have to sift through a string data = soup.find("pre") lines = data.contents[len(data.contents) - 1] lines = lines.strip() exp = re.compile(r"\n+|\r+|\f+") lines = exp.split(lines) names = [] for i in range(len(lines)): line = lines[i].strip() if line.find("Taken on") > -1: # then the text is in the form of: "Take on <date> <reason>" split = line.split(None, 3) date = split[2] if len(split) > 3: motion = split[3] elif line.find("Those voting Yea") > -1: yeas = self.get_num_from_line(line) elif line.find("Those voting Nay") > -1: neas = self.get_num_from_line(line) elif line.find("Those absent and not voting") > -1: others = self.get_num_from_line(line) elif (line.find("Necessary for Adoption") > -1) or (line.find("Necessary for Passage") > -1): if line.find("Adoption") > -1: motion = "Adoption" else: motion = "Passage" necessary = self.get_num_from_line(line) elif line.find("The following is the roll call vote:") > -1: break # the next lines contain actual votes # process the vote values if yeas >= necessary: passed = True else: passed = False vote = Vote(chamber, date, motion, passed, yeas, neas, others) lines = lines[i + 1 :] lines = string.join(lines, " ") lines = lines.split(" ") absent_vote_value = re.compile("^(X|A)$") yea_vote_value = re.compile("^Y$") nea_vote_value = re.compile("^N$") # there aren't two spaces between vote and name so it doesn't get parsed annoying_vote = re.compile("^(Y|X|A|N) ([\S ]+)$") digits = re.compile("^[\d ]+$") vote_value = None for word in lines: word = word.strip() if (len(word) > 0) and (digits.search(word) == None): word = strip_digits(word) if vote_value != None: if vote_value == "Y": vote.yes(word) elif vote_value == "N": vote.no(word) else: vote.other(word) vote_value = None elif absent_vote_value.match(word) != None: vote_value = "X" elif yea_vote_value.match(word) != None: vote_value = "Y" elif nea_vote_value.match(word) != None: vote_value = "N" elif annoying_vote.match(word) != None: split = annoying_vote.match(word) vote_value = split.group(2) name = split.group(1) if vote_value == "Y": vote.yes(name) elif vote_value == "N": vote.no(name) else: vote.other(name) vote_value = None return vote
def scrape_votes(self, chamber, url, bill, date, **kwargs): """ Scrapes the votes from a vote detail page with the legislator's names this handles all of the votes and expects the following keyword arguments: motion ... hmm I guess thats it :) """ o_args = {} passed = '' # to test if we need to compare vote counts later v_type = kwargs.pop('type') if 'passed' in kwargs: passed = {'PASSED': True, 'FAILED': False}[kwargs.pop('passed')] if 'AMEND' in kwargs: o_args['amended'] = kwargs.pop('AMEND').text_content().strip() if 'motion' in kwargs: motion = kwargs.pop('motion') if 'EMER' in kwargs and kwargs['EMER'].text_content().strip(): o_args['EMER'] = kwargs.pop('EMER').text_content().strip() if '2/3 VOTE' in kwargs and kwargs['2/3 VOTE'].text_content().strip(): o_args['2/3 VOTE'] = kwargs.pop('2/3 VOTE').text_content().strip() if 'committee' in kwargs: o_args['committee'] = utils.get_committee_name(kwargs.pop('committee'), chamber) with self.urlopen(url) as vote_page: root = html.fromstring(vote_page) vote_table = root.xpath('/html/body/div/table/tr[3]/td[4]/table/tr/td/table/tr/td/table')[0] vote_count = vote_table.xpath('following-sibling::p/following-sibling::text()') vote_string = vote_count[0].replace(u'\xa0', '').strip() v_count = re.compile(r'\b[A-Z]*\s*[A-z]*:\s\d*') v_list = v_count.findall(vote_string) o_count = 0 for x in v_list: k, v = x.split(':') # make NOT VOTING not_voting k = k.strip().replace(' ', '_').lower() v = int(v.strip()) if k == 'ayes': yes_count = int(v) elif k == 'nays': no_count = int(v) else: o_args.update({str(k):v}) o_count += int(v) if passed == '': passed = yes_count > no_count if 'committee' not in o_args: if chamber == 'upper' and passed: if 'EMER' in o_args or '2/3 VOTE' in o_args: passed = yes_count > 20 else: passed = yes_count > 16 elif chamber == 'lower' and passed: if 'EMER' in o_args or '2/3 VOTE' in o_args: passed = yes_count > 40 else: passed = yes_count > 31 vote = Vote(chamber, date, motion, passed, yes_count, no_count, o_count, type=v_type, **o_args) vote.add_source(url) # grab all the tables descendant tds tds = vote_table.xpath('descendant::td') # pair 'em up matched = [ tds[y:y+2] for y in range(0, len(tds), 2) ] for name, v in iter(matched): v = v.text_content().strip() name = name.text_content().strip() if name == 'Member Name': continue if v == 'Y': vote.yes(name) elif v == 'N': vote.no(name) else: vote.other(name) bill.add_vote(vote)
def scrape_bill_type(self, chamber, session, bill_type, type_abbr): if chamber == 'upper': chamber_name = 'SENATE' else: chamber_name = 'ASSEMBLY' bills = self.session.query(CABill).filter_by( session_year=session).filter_by( measure_type=type_abbr) for bill in bills: bill_session = session if bill.session_num != '0': bill_session += ' Special Session %s' % bill.session_num bill_id = bill.short_bill_id fsbill = Bill(bill_session, chamber, bill_id, '') title = '' short_title = '' type = ['bill'] subject = '' for version in self.session.query(CABillVersion).filter_by( bill=bill).filter(CABillVersion.bill_xml != None): title = version.title short_title = version.short_title type = [bill_type] if version.appropriation == 'Yes': type.append('appropriation') if version.fiscal_committee == 'Yes': type.append('fiscal committee') if version.local_program == 'Yes': type.append('local program') if version.urgency == 'Yes': type.append('urgency') if version.taxlevy == 'Yes': type.append('tax levy') subject = version.subject fsbill.add_version(version.bill_version_id, '', date=version.bill_version_action_date, title=version.title, short_title=version.short_title, subject=[subject], type=type) if not title: self.warning("Couldn't find title for %s, skipping" % bill_id) continue fsbill['title'] = title fsbill['short_title'] = short_title fsbill['type'] = type fsbill['subjects'] = [subject] for author in version.authors: if author.house == chamber_name: fsbill.add_sponsor(author.contribution, author.name) for action in bill.actions: if not action.action: # NULL action text seems to be an error on CA's part, # unless it has some meaning I'm missing continue actor = action.actor or chamber actor = actor.strip() match = re.match(r'(Assembly|Senate)($| \(Floor)', actor) if match: actor = {'Assembly': 'lower', 'Senate': 'upper'}[match.group(1)] elif actor.startswith('Governor'): actor = 'executive' else: actor = re.sub('^Assembly', 'lower', actor) actor = re.sub('^Senate', 'upper', actor) type = [] act_str = action.action if act_str.startswith('Introduced'): type.append('bill:introduced') if 'To Com' in act_str: type.append('committee:referred') if 'Read third time. Passed.' in act_str: type.append('bill:passed') if 'Approved by Governor' in act_str: type.append('governor:signed') if 'Item veto' in act_str: type.append('governor:vetoed:line-item') if not type: type = ['other'] fsbill.add_action(actor, act_str, action.action_date, type=type) for vote in bill.votes: if vote.vote_result == '(PASS)': result = True else: result = False full_loc = vote.location.description first_part = full_loc.split(' ')[0].lower() if first_part in ['asm', 'assembly']: vote_chamber = 'lower' vote_location = ' '.join(full_loc.split(' ')[1:]) elif first_part.startswith('sen'): vote_chamber = 'upper' vote_location = ' '.join(full_loc.split(' ')[1:]) else: raise ScrapeError("Bad location: %s" % full_loc) motion = vote.motion.motion_text or '' if "Third Reading" in motion or "3rd Reading" in motion: vtype = 'passage' elif "Do Pass" in motion: vtype = 'passage' else: vtype = 'other' motion = motion.strip() # Why did it take until 2.7 to get a flags argument on re.sub? motion = re.compile(r'(\w+)( Extraordinary)? Session$', re.IGNORECASE).sub('', motion) motion = re.compile(r'^(Senate|Assembly) ', re.IGNORECASE).sub('', motion) motion = re.sub(r'^(SCR|SJR|SB|AB|AJR|ACR)\s?\d+ \w+\.? ', '', motion) motion = re.sub(r' \(\w+\)$', '', motion) motion = re.sub(r'(SCR|SB|AB|AJR|ACR)\s?\d+ \w+\.?$', '', motion) motion = re.sub(r'(SCR|SJR|SB|AB|AJR|ACR)\s?\d+ \w+\.? ' r'Urgency Clause$', '(Urgency Clause)', motion) motion = re.sub(r'\s+', ' ', motion) if not motion: self.warning("Got blank motion on vote for %s" % bill_id) continue fsvote = Vote(vote_chamber, vote.vote_date_time, motion, result, int(vote.ayes), int(vote.noes), int(vote.abstain), threshold=vote.threshold, type=vtype) if vote_location != 'Floor': fsvote['committee'] = vote_location for record in vote.votes: if record.vote_code == 'AYE': fsvote.yes(record.legislator_name) elif record.vote_code.startswith('NO'): fsvote.no(record.legislator_name) else: fsvote.other(record.legislator_name) fsbill.add_vote(fsvote) self.save_bill(fsbill)
def scrape_bill_type(self, chamber, session, bill_type, type_abbr): if chamber == "upper": chamber_name = "SENATE" else: chamber_name = "ASSEMBLY" bills = self.session.query(CABill).filter_by(session_year=session).filter_by(measure_type=type_abbr) for bill in bills: bill_session = session if bill.session_num != "0": bill_session += " Special Session %s" % bill.session_num bill_id = bill.short_bill_id fsbill = Bill(bill_session, chamber, bill_id, "") # Construct session for web query, going from '20092010' to '0910' source_session = session[2:4] + session[6:8] # Turn 'AB 10' into 'ab_10' source_num = "%s_%s" % (bill.measure_type.lower(), bill.measure_num) # Construct a fake source url source_url = "http://www.leginfo.ca.gov/cgi-bin/postquery?" "bill_number=%s&sess=%s" % ( source_num, source_session, ) fsbill.add_source(source_url) title = "" short_title = "" type = ["bill"] subject = "" for version in ( self.session.query(CABillVersion).filter_by(bill=bill).filter(CABillVersion.bill_xml != None) ): title = version.title short_title = version.short_title type = [bill_type] if version.appropriation == "Yes": type.append("appropriation") if version.fiscal_committee == "Yes": type.append("fiscal committee") if version.local_program == "Yes": type.append("local program") if version.urgency == "Yes": type.append("urgency") if version.taxlevy == "Yes": type.append("tax levy") subject = version.subject fsbill.add_version( version.bill_version_id, "", date=version.bill_version_action_date.date(), title=version.title, short_title=version.short_title, subject=[subject], type=type, ) if not title: self.warning("Couldn't find title for %s, skipping" % bill_id) continue fsbill["title"] = title fsbill["short_title"] = short_title fsbill["type"] = type fsbill["subjects"] = [subject] for author in version.authors: if author.house == chamber_name: fsbill.add_sponsor(author.contribution, author.name) for action in bill.actions: if not action.action: # NULL action text seems to be an error on CA's part, # unless it has some meaning I'm missing continue actor = action.actor or chamber actor = actor.strip() match = re.match(r"(Assembly|Senate)($| \(Floor)", actor) if match: actor = {"Assembly": "lower", "Senate": "upper"}[match.group(1)] elif actor.startswith("Governor"): actor = "executive" else: actor = re.sub("^Assembly", "lower", actor) actor = re.sub("^Senate", "upper", actor) type = [] act_str = action.action if act_str.startswith("Introduced"): type.append("bill:introduced") if "To Com" in act_str: type.append("committee:referred") if "Read third time. Passed." in act_str: type.append("bill:passed") if "Approved by Governor" in act_str: type.append("governor:signed") if "Item veto" in act_str: type.append("governor:vetoed:line-item") if not type: type = ["other"] fsbill.add_action(actor, act_str, action.action_date.date(), type=type) for vote in bill.votes: if vote.vote_result == "(PASS)": result = True else: result = False full_loc = vote.location.description first_part = full_loc.split(" ")[0].lower() if first_part in ["asm", "assembly"]: vote_chamber = "lower" vote_location = " ".join(full_loc.split(" ")[1:]) elif first_part.startswith("sen"): vote_chamber = "upper" vote_location = " ".join(full_loc.split(" ")[1:]) else: raise ScrapeError("Bad location: %s" % full_loc) motion = vote.motion.motion_text or "" if "Third Reading" in motion or "3rd Reading" in motion: vtype = "passage" elif "Do Pass" in motion: vtype = "passage" else: vtype = "other" motion = motion.strip() # Why did it take until 2.7 to get a flags argument on re.sub? motion = re.compile(r"(\w+)( Extraordinary)? Session$", re.IGNORECASE).sub("", motion) motion = re.compile(r"^(Senate|Assembly) ", re.IGNORECASE).sub("", motion) motion = re.sub(r"^(SCR|SJR|SB|AB|AJR|ACR)\s?\d+ \w+\.? ", "", motion) motion = re.sub(r" \(\w+\)$", "", motion) motion = re.sub(r"(SCR|SB|AB|AJR|ACR)\s?\d+ \w+\.?$", "", motion) motion = re.sub(r"(SCR|SJR|SB|AB|AJR|ACR)\s?\d+ \w+\.? " r"Urgency Clause$", "(Urgency Clause)", motion) motion = re.sub(r"\s+", " ", motion) if not motion: self.warning("Got blank motion on vote for %s" % bill_id) continue fsvote = Vote( vote_chamber, self._tz.localize(vote.vote_date_time), motion, result, int(vote.ayes), int(vote.noes), int(vote.abstain), threshold=vote.threshold, type=vtype, ) if vote_location != "Floor": fsvote["committee"] = vote_location for record in vote.votes: if record.vote_code == "AYE": fsvote.yes(record.legislator_name) elif record.vote_code.startswith("NO"): fsvote.no(record.legislator_name) else: fsvote.other(record.legislator_name) fsbill.add_vote(fsvote) self.save_bill(fsbill)