def scrape_vote(self, bill, date, motion, url): page = lxml.html.fromstring(self.urlopen(url)) if url.endswith('Senate'): actor = 'upper' else: actor = 'lower' count_path = "string(//td[@align = 'center' and contains(., '%s: ')])" yes_count = int(page.xpath(count_path % "Yeas").split()[-1]) no_count = int(page.xpath(count_path % "Nays").split()[-1]) other_count = int(page.xpath(count_path % "Non Voting").split()[-1]) other_count += int(page.xpath(count_path % "Present").split()[-1]) passed = yes_count > no_count + other_count vote = Vote(actor, date, motion, passed, yes_count, no_count, other_count) vote.add_source(url) vote_path = "//h3[. = '%s']/following-sibling::table[1]/tr/td/a" for yes in page.xpath(vote_path % "Yeas"): vote.yes(yes.text) for no in page.xpath(vote_path % "Nays"): vote.no(no.text) for other in page.xpath(vote_path % "Non Voting"): vote.other(other.text) for other in page.xpath(vote_path % "Present"): vote.other(other.text) bill.add_vote(vote)
def scrape_vote(self, bill, vote_chamber, bill_id, vote_id, vote_date, action_text): url = ('http://alisondb.legislature.state.al.us/Alison/' 'GetRollCallVoteResults.aspx?' 'VOTE={0}&BODY={1}&INST={2}&SESS={3}'. format(vote_id, vote_chamber, bill_id, self.session_id)) doc = lxml.html.fromstring(self.get(url=url).text) voters = {'Y': [], 'N': [], 'P': [], 'A': []} voters_and_votes = doc.xpath('//table/tr/td/font/text()') capture_vote = False name = '' for item in voters_and_votes: if capture_vote: capture_vote = False if name: voters[item].append(name) else: capture_vote = True name = item if (name.endswith(", Vacant") or name.startswith("Total ") or not name.strip()): name = '' # Check name counts against totals listed on the site total_yea = doc.xpath('//*[starts-with(text(), "Total Yea")]/text()') if total_yea: total_yea = int(total_yea[0].split(":")[-1]) assert total_yea == len(voters['Y']), "Yea count incorrect" else: total_yea = len(voters['Y']) total_nay = doc.xpath('//*[starts-with(text(), "Total Nay")]/text()') if total_nay: total_nay = int(total_nay[0].split(":")[-1]) assert total_nay == len(voters['N']), "Nay count incorrect" else: total_nay = len(voters['N']) total_absent = doc.xpath( '//*[starts-with(text(), "Total Absent")]/text()') if total_absent: total_absent = int(total_absent[0].split(":")[-1]) assert total_absent == len(voters['A']), "Absent count incorrect" total_other = len(voters['P']) + len(voters['A']) vote = Vote( self.CHAMBERS[vote_chamber[0]], vote_date, action_text, total_yea > total_nay, total_yea, total_nay, total_other) vote.add_source(url) for member in voters['Y']: vote.yes(member) for member in voters['N']: vote.no(member) for member in (voters['A'] + voters['P']): vote.other(member) bill.add_vote(vote)
def scrape_bill(self, session, bills): billdata, details = bills[0] (senate_url, assembly_url, bill_chamber, bill_type, bill_id, title, (letter, number, is_amd)) = details data = billdata['data']['bill'] assembly = AssemblyBillPage(self, session, bill_chamber, details) assembly.build() bill = assembly.bill bill.add_source(billdata['url']) # Add companion. if data['sameAs']: bill.add_companion(data['sameAs']) if data['summary']: bill['summary'] = data['summary'] if data['votes']: for vote_data in data['votes']: vote = Vote(chamber='upper', date=self.date_from_timestamp( vote_data['voteDate']), motion=vote_data['description'] or '[No motion available.]', passed=False, yes_votes=[], no_votes=[], other_votes=[], yes_count=0, no_count=0, other_count=0) for name in vote_data['ayes']: vote.yes(name) vote['yes_count'] += 1 for names in map(vote_data.get, ['absent', 'excused', 'abstains']): for name in names: vote.other(name) vote['other_count'] += 1 for name in vote_data['nays']: vote.no(name) vote['no_count'] += 1 vote['passed'] = vote['yes_count'] > vote['no_count'] bill.add_vote(vote) # if data['previousVersions']: # These are instances of the same bill from prior sessions. # import pdb; pdb.set_trace() if not data['title']: bill['title'] = bill['summary'] self.save_bill(bill)
def scrape_vote(self, bill, date, motion, url): page = self.urlopen(url) if "not yet official" in page: # Sometimes they link to vote pages before they go live return page = lxml.html.fromstring(page) if url.endswith("Senate"): actor = "upper" else: actor = "lower" count_path = "string(//td[@align = 'center' and contains(., '%s: ')])" yes_count = int(page.xpath(count_path % "Yeas").split()[-1]) no_count = int(page.xpath(count_path % "Nays").split()[-1]) other_count = int(page.xpath(count_path % "Non Voting").split()[-1]) other_count += int(page.xpath(count_path % "Present").split()[-1]) passed = yes_count > no_count + other_count vote = Vote(actor, date, motion, passed, yes_count, no_count, other_count) vote.add_source(url) vote_path = "//h3[. = '%s']/following-sibling::table[1]/tr/td/a" for yes in page.xpath(vote_path % "Yeas"): vote.yes(yes.text) for no in page.xpath(vote_path % "Nays"): vote.no(no.text) for other in page.xpath(vote_path % "Non Voting"): vote.other(other.text) for other in page.xpath(vote_path % "Present"): vote.other(other.text) bill.add_vote(vote)
def parse_vote(self, actor, date, row): """ takes the actor, date and row element and returns a Vote object """ spans = row.xpath('.//span') motion = row.text.replace(u'\u00a0'," ").replace("-","").strip() motion = motion if motion else "passage" passed, yes_count, no_count, other_count = spans[0].text_content().rsplit('-',3) yes_votes = self.get_names(spans[1].tail) no_votes = self.get_names(spans[2].tail) other_votes = [] for span in spans[3:]: if span.text.startswith(('Absent', 'Excused')): other_votes += self.get_names(span.tail) for key, val in {'adopted': True, 'passed': True, 'failed':False}.items(): if key in passed.lower(): passed = val break vote = Vote(actor, date, motion, passed, int(yes_count), int(no_count), int(other_count)) for name in yes_votes: if name and name != 'None': vote.yes(name) for name in no_votes: if name and name != 'None': vote.no(name) for name in other_votes: if name and name != 'None': vote.other(name) return vote
def scrape_vote(self, bill, date, url): with self.urlopen(url) as page: page = lxml.html.fromstring(page) header = page.xpath("string(//h4[contains(@id, 'hdVote')])") location = header.split(', ')[1] if location.startswith('House'): chamber = 'lower' elif location.startswith('Senate'): chamber = 'upper' else: raise ScrapeError("Bad chamber: %s" % chamber) committee = ' '.join(location.split(' ')[1:]).strip() if not committee or committee.startswith('of Representatives'): committee = None motion = ', '.join(header.split(', ')[2:]).strip() yes_count = int( page.xpath("string(//td[contains(@id, 'tdAyes')])")) no_count = int( page.xpath("string(//td[contains(@id, 'tdNays')])")) excused_count = int( page.xpath("string(//td[contains(@id, 'tdExcused')])")) absent_count = int( page.xpath("string(//td[contains(@id, 'tdAbsent')])")) other_count = excused_count + absent_count passed = yes_count > no_count if motion.startswith('Do Pass'): type = 'passage' elif motion == 'Concurred in amendments': type = 'amendment' elif motion == 'Veto override': type = 'veto_override' else: type = 'other' vote = Vote(chamber, date, motion, passed, yes_count, no_count, other_count) vote['type'] = type if committee: vote['committee'] = committee vote.add_source(url) for td in page.xpath("//table[contains(@id, 'tblVotes')]/tr/td"): if td.text == 'Yea': vote.yes(td.getprevious().text.strip()) elif td.text == 'Nay': vote.no(td.getprevious().text.strip()) elif td.text in ('Excused', 'Absent'): vote.other(td.getprevious().text.strip()) bill.add_vote(vote)
def scrape_vote(self, bill, vote_type_id, vote_type): base_url = "http://dcclims1.dccouncil.us/lims/voting.aspx?VoteTypeID=%s&LegID=%s" url = base_url % (vote_type_id, bill["bill_id"]) with self.urlopen(url) as html: doc = lxml.html.fromstring(html) vote_date = convert_date(doc.get_element_by_id("VoteDate").text) # check if voice vote / approved boxes have an 'x' voice = doc.xpath('//span[@id="VoteTypeVoice"]/b/text()')[0] == "x" passed = doc.xpath('//span[@id="VoteResultApproved"]/b/text()')[0] == "x" yes_count = extract_int(doc.xpath('//span[@id="VoteCount1"]/b/text()')[0]) no_count = extract_int(doc.xpath('//span[@id="VoteCount2"]/b/text()')[0]) # every now and then this actually drops below 0 (error in count) other_count = max(13 - (yes_count + no_count), 0) vote = Vote("upper", vote_date, vote_type, passed, yes_count, no_count, other_count, voice_vote=voice) vote.add_source(url) # members are only text on page in a <u> tag for member_u in doc.xpath("//u"): member = member_u.text vote_text = member_u.xpath("../../i/text()")[0] if "Yes" in vote_text: vote.yes(member) elif "No" in vote_text: vote.no(member) else: vote.other(member) bill.add_vote(vote)
def apply_votes(self, bill): """Given a bill (and assuming it has a status_url in its dict), parse all of the votes """ bill_votes = votes.all_votes_for_url(self, bill['status_url']) for (chamber,vote_desc,pdf_url,these_votes) in bill_votes: try: date = vote_desc.split("-")[-1] except IndexError: self.warning("[%s] Couldn't get date out of [%s]" % (bill['bill_id'],vote_desc)) continue yes_votes = [] no_votes = [] other_votes = [] for voter,vote in these_votes.iteritems(): if vote == 'Y': yes_votes.append(voter) elif vote == 'N': no_votes.append(voter) else: other_votes.append(voter) passed = len(yes_votes) > len(no_votes) # not necessarily correct, but not sure where else to get it. maybe from pdf vote = Vote(standardize_chamber(chamber),date,vote_desc,passed, len(yes_votes), len(no_votes), len(other_votes),pdf_url=pdf_url) for voter in yes_votes: vote.yes(voter) for voter in no_votes: vote.no(voter) for voter in other_votes: vote.other(voter) bill.add_vote(vote)
def parse_vote(self, actor, date, row): """ takes the actor, date and row element and returns a Vote object """ spans = row.xpath('.//span') motion = row.text passed, yes_count, no_count, other_count = spans[0].text_content().split('-') yes_votes = [ name for name in spans[1].tail.replace(u'\xa0--\xa0', '').split(',') if name ] no_votes = [ name for name in spans[2].tail.replace(u'\xa0--\xa0', '').split(',') if name ] other_votes = [] if spans[3].text.startswith('Absent'): other_votes = [ name for name in spans[3].tail.replace(u'\xa0--\xa0', '').split(',') if name ] for key, val in {'adopted': True, 'passed': True, 'failed':False}.items(): if key in passed.lower(): passed = val break vote = Vote(actor, date, motion, passed, int(yes_count), int(no_count), int(other_count)) for name in yes_votes: if name and name != 'None': vote.yes(name) for name in no_votes: if name and name != 'None': vote.no(name) for name in other_votes: if name and name != 'None': vote.other(name) return vote
def scrape(self, chamber, session): self.validate_session(session) if chamber == 'upper': other_chamber = 'lower' bill_id = 'SB 1' else: other_chamber = 'upper' bill_id = 'HB 1' b1 = Bill(session, chamber, bill_id, 'A super bill') b1.add_source('http://example.com/') b1.add_version('As Introduced', 'http://example.com/SB1.html') b1.add_document('Google', 'http://google.com') b1.add_sponsor('primary', 'Bob Smith') b1.add_sponsor('secondary', 'Johnson, Sally') d1 = datetime.datetime.strptime('1/29/2010', '%m/%d/%Y') v1 = Vote('upper', d1, 'Final passage', True, 2, 0, 0) v1.yes('Smith') v1.yes('Johnson') d2 = datetime.datetime.strptime('1/30/2010', '%m/%d/%Y') v2 = Vote('lower', d2, 'Final passage', False, 0, 1, 1) v2.no('Bob Smith') v2.other('S. Johnson') b1.add_vote(v1) b1.add_vote(v2) b1.add_action(chamber, 'introduced', d1) b1.add_action(chamber, 'read first time', d2) b1.add_action(other_chamber, 'introduced', d2) self.save_bill(b1)
def parse_vote(self, actor, date, row): """ takes the actor, date and row element and returns a Vote object """ spans = row.xpath('.//span') motion = row.text.replace(u'\u00a0', " ").replace("-", "").strip() motion = motion if motion else "passage" passed, yes_count, no_count, other_count = spans[0].text_content().rsplit('-', 3) yes_votes = self.get_names(spans[1].tail) no_votes = self.get_names(spans[2].tail) other_votes = [] for span in spans[3:]: if span.text.startswith(('Absent', 'Excused')): other_votes += self.get_names(span.tail) for key, val in {'adopted': True, 'passed': True, 'failed': False}.items(): if key in passed.lower(): passed = val break vote = Vote(actor, date, motion, passed, int(yes_count), int(no_count), int(other_count)) for name in yes_votes: if name and name != 'None': vote.yes(name) for name in no_votes: if name and name != 'None': vote.no(name) for name in other_votes: if name and name != 'None': vote.other(name) return vote
def build_lower_votes(self): url = ('http://assembly.state.ny.us/leg/?' 'default_fld=&bn=%s&term=%s&Votes=Y') url = url % (self.bill_id, self.term_start_year) self.urls.add(votes=url) self.bill.add_source(url) doc = self.urls.votes.doc if doc is None: return pre = doc.xpath('//pre')[0].text_content() no_votes = ('There are no votes for this bill in this ' 'legislative session.') if pre == no_votes: return actual_vote = collections.defaultdict(list) for table in doc.xpath('//table'): date = table.xpath('caption/label[contains(., "DATE:")]') date = date[0].itersiblings().next().text date = datetime.datetime.strptime(date, '%m/%d/%Y') votes = table.xpath('caption/span/label[contains(., "YEA/NAY:")]') votes = votes[0].itersiblings().next().text yes_count, no_count = map(int, votes.split('/')) passed = yes_count > no_count vote = Vote('lower', date, 'Floor Vote', passed, yes_count, no_count, other_count=0) tds = table.xpath('tr/td/text()') votes = iter(tds) while True: try: data = list(islice(votes, 2)) name, vote_val = data except (StopIteration, ValueError): # End of data. Stop. break name = self._scrub_name(name) if vote_val.strip() == 'Y': vote.yes(name) elif vote_val.strip() in ('N', 'NO'): vote.no(name) else: vote.other(name) actual_vote[vote_val].append(name) # The page doesn't provide an other_count. vote['other_count'] = len(vote['other_votes']) vote['actual_vote'] = actual_vote self.bill.add_vote(vote)
def vote(self): '''Return a billy vote. ''' actual_vote_dict = self.vote_values() date = self.date() motion = self.motion() passed = self.passed() counts = self.get_counts() yes_count = sum(int(counts.get(key, 0)) for key in ('Yea', 'Yeas')) no_count = sum(int(counts.get(key, 0)) for key in ('Nay', 'Nays')) vote = Vote(self.chamber, date, motion, passed, yes_count, no_count, sum(map(int, counts.values())) - (yes_count + no_count)) for k,v in actual_vote_dict.items(): if k == "yes": for l in v: vote.yes(l) elif k == "no": for l in v: vote.no(l) elif k == "other": for l in v: vote.other(l) vote.add_source(self.url) return vote
def scrape_vote(self, bill, vote_url, chamber, date): page = self.lxmlize(vote_url) motion = page.xpath( '//td/b/font[text()="MOTION:"]/../../following-sibling::td/font/text()' )[0] if 'withdrawn' not in motion: # Every table row after the one with VOTE in a td/div/b/font rolls = page.xpath( '//tr[preceding-sibling::tr/td/div/b/font/text()="VOTE"]') count_row = rolls[-1] yes_count = count_row.xpath( './/b/font[normalize-space(text())="YES:"]' '/../following-sibling::font[1]/text()')[0] no_count = count_row.xpath( './/b/font[normalize-space(text())="NO:"]' '/../following-sibling::font[1]/text()')[0] exc_count = count_row.xpath( './/b/font[normalize-space(text())="EXC:"]' '/../following-sibling::font[1]/text()')[0] nv_count = count_row.xpath( './/b/font[normalize-space(text())="ABS:"]' '/../following-sibling::font[1]/text()')[0] if count_row.xpath( './/b/font[normalize-space(text())="FINAL ACTION:"]' '/../following-sibling::b[1]/font/text()'): final = count_row.xpath( './/b/font[normalize-space(text())="FINAL ACTION:"]' '/../following-sibling::b[1]/font/text()')[0] passed = True if 'pass' in final.lower( ) or int(yes_count) > int(no_count) else False elif 'passed without objection' in motion.lower(): passed = True yes_count = int(len(rolls[:-2])) else: self.warning("No vote breakdown found for %s" % vote_url) return other_count = int(exc_count) + int(nv_count) vote = Vote(chamber, date, motion, passed, int(yes_count), int(no_count), int(other_count)) for roll in rolls[:-2]: voter = roll.xpath('td[2]/div/font')[0].text_content() voted = roll.xpath('td[3]/div/font')[0].text_content().strip() if voted: if 'Yes' in voted: vote.yes(voter) elif 'No' in voted: vote.no(voter) else: vote.other(voter) elif 'passed without objection' in motion.lower() and voter: vote.yes(voter) bill.add_vote(vote)
def scrape_vote(self, bill, name, url): if "VOTE/H" in url: vote_chamber = 'lower' cols = (1, 5, 9, 13) name_offset = 3 yes_offset = 0 no_offset = 1 else: vote_chamber = 'upper' cols = (1, 6) name_offset = 4 yes_offset = 1 no_offset = 2 page = self.get(url).text if 'BUDGET ADDRESS' in page: return page = lxml.html.fromstring(page) yes_count = page.xpath( "string(//span[contains(., 'Those voting Yea')])") yes_count = int(re.match(r'[^\d]*(\d+)[^\d]*', yes_count).group(1)) no_count = page.xpath( "string(//span[contains(., 'Those voting Nay')])") no_count = int(re.match(r'[^\d]*(\d+)[^\d]*', no_count).group(1)) other_count = page.xpath("string(//span[contains(., 'Those absent')])") other_count = int(re.match(r'[^\d]*(\d+)[^\d]*', other_count).group(1)) need_count = page.xpath("string(//span[contains(., 'Necessary for')])") need_count = int(re.match(r'[^\d]*(\d+)[^\d]*', need_count).group(1)) date = page.xpath("string(//span[contains(., 'Taken on')])") date = re.match(r'.*Taken\s+on\s+(\d+/\s?\d+)', date).group(1) date = date.replace(' ', '') date = datetime.datetime.strptime(date + " " + bill['session'], "%m/%d %Y").date() vote = Vote(vote_chamber, date, name, yes_count > need_count, yes_count, no_count, other_count) vote.add_source(url) table = page.xpath("//table")[0] for row in table.xpath("tr"): for i in cols: name = row.xpath("string(td[%d])" % (i + name_offset)).strip() if not name or name == 'VACANT': continue if "Y" in row.xpath("string(td[%d])" % (i + yes_offset)): vote.yes(name) elif "N" in row.xpath("string(td[%d])" % (i + no_offset)): vote.no(name) else: vote.other(name) bill.add_vote(vote)
def _build_lower_votes(self): url = self.shared_url + '&Votes=Y' self.urls.add(votes=url) self.bill.add_source(url) doc = self.urls.votes.doc if doc is None: return # Grab bill information. try: pre = doc.xpath('//pre')[0].text_content() no_votes = ('There are no votes for this bill in this legislative ' 'session.') if pre == no_votes: raise ValueError('No votes for this bill.') # Skip bill if votes can't be found. except (IndexError, ValueError) as e: return actual_vote = collections.defaultdict(list) for table in doc.xpath('//table'): date = table.xpath('caption/label[contains(., "DATE:")]') date = date[0].itersiblings().next().text date = datetime.datetime.strptime(date, '%m/%d/%Y') votes = table.xpath('caption/span/label[contains(., "YEA/NAY:")]') votes = votes[0].itersiblings().next().text yes_count, no_count = map(int, votes.split('/')) passed = yes_count > no_count vote = Vote('lower', date, 'Floor Vote', passed, yes_count, no_count, other_count=0) tds = table.xpath('tr/td/text()') votes = iter(tds) while True: try: data = list(islice(votes, 2)) name, vote_val = data except (StopIteration, ValueError): # End of data. Stop. break name = self._scrub_name(name) if vote_val.strip() == 'Y': vote.yes(name) elif vote_val.strip() in ('N', 'NO'): vote.no(name) else: vote.other(name) actual_vote[vote_val].append(name) # The page doesn't provide an other_count. vote['other_count'] = len(vote['other_votes']) vote['actual_vote'] = actual_vote self.bill.add_vote(vote)
def scrape_vote(self, bill, chamber, date, url): (path, resp) = self.urlretrieve(url) text = convert_pdf(path, 'text') os.remove(path) try: motion = text.split('\n')[4].strip() except IndexError: return try: yes_count = int(re.search(r'Yeas - (\d+)', text).group(1)) except AttributeError: return no_count = int(re.search(r'Nays - (\d+)', text).group(1)) other_count = int(re.search(r'Not Voting - (\d+)', text).group(1)) passed = yes_count > (no_count + other_count) vote = Vote(chamber, date, motion, passed, yes_count, no_count, other_count) vote.add_source(url) y,n,o = 0,0,0 break_outter = False for line in text.split('\n')[9:]: if break_outter: break if 'after roll call' in line: break if 'Indication of Vote' in line: break if 'Presiding' in line: continue for col in re.split(r'-\d+', line): col = col.strip() if not col: continue match = re.match(r'(Y|N|EX|\*)\s+(.+)$', col) if match: if match.group(2) == "PAIR": break_outter = True break if match.group(1) == 'Y': vote.yes(match.group(2)) elif match.group(1) == 'N': vote.no(match.group(2)) else: vote.other(match.group(2)) else: vote.other(col.strip()) vote.validate() bill.add_vote(vote)
def build_lower_votes(self): url = "http://assembly.state.ny.us/leg/?" "default_fld=&bn=%s&term=%s&Votes=Y" url = url % (self.bill_id, self.term_start_year) self.urls.add(votes=url) self.bill.add_source(url) doc = self.urls.votes.doc if doc is None: return # Grab bill information. try: pre = doc.xpath("//pre")[0].text_content() no_votes = "There are no votes for this bill in this legislative " "session." if pre == no_votes: raise ValueError("No votes for this bill.") # Skip bill if votes can't be found. except (IndexError, ValueError) as e: return actual_vote = collections.defaultdict(list) for table in doc.xpath("//table"): date = table.xpath('caption/label[contains(., "DATE:")]') date = date[0].itersiblings().next().text date = datetime.datetime.strptime(date, "%m/%d/%Y") votes = table.xpath('caption/span/label[contains(., "YEA/NAY:")]') votes = votes[0].itersiblings().next().text yes_count, no_count = map(int, votes.split("/")) passed = yes_count > no_count vote = Vote("lower", date, "Floor Vote", passed, yes_count, no_count, other_count=0) tds = table.xpath("tr/td/text()") votes = iter(tds) while True: try: data = list(islice(votes, 2)) name, vote_val = data except (StopIteration, ValueError): # End of data. Stop. break name = self._scrub_name(name) if vote_val.strip() == "Y": vote.yes(name) elif vote_val.strip() in ("N", "NO"): vote.no(name) else: vote.other(name) actual_vote[vote_val].append(name) # The page doesn't provide an other_count. vote["other_count"] = len(vote["other_votes"]) vote["actual_vote"] = actual_vote self.bill.add_vote(vote)
def scrape_vote(self, bill, name, url): if "VOTE/H" in url: vote_chamber = "lower" cols = (1, 5, 9, 13) name_offset = 3 yes_offset = 0 no_offset = 1 else: vote_chamber = "upper" cols = (1, 6) name_offset = 4 yes_offset = 1 no_offset = 2 # Connecticut's SSL is causing problems with Scrapelib, so use Requests page = requests.get(url, verify=False).text if "BUDGET ADDRESS" in page: return page = lxml.html.fromstring(page) yes_count = page.xpath("string(//span[contains(., 'Those voting Yea')])") yes_count = int(re.match(r"[^\d]*(\d+)[^\d]*", yes_count).group(1)) no_count = page.xpath("string(//span[contains(., 'Those voting Nay')])") no_count = int(re.match(r"[^\d]*(\d+)[^\d]*", no_count).group(1)) other_count = page.xpath("string(//span[contains(., 'Those absent')])") other_count = int(re.match(r"[^\d]*(\d+)[^\d]*", other_count).group(1)) need_count = page.xpath("string(//span[contains(., 'Necessary for')])") need_count = int(re.match(r"[^\d]*(\d+)[^\d]*", need_count).group(1)) date = page.xpath("string(//span[contains(., 'Taken on')])") date = re.match(r".*Taken\s+on\s+(\d+/\s?\d+)", date).group(1) date = date.replace(" ", "") date = datetime.datetime.strptime(date + " " + bill["session"], "%m/%d %Y").date() vote = Vote(vote_chamber, date, name, yes_count > need_count, yes_count, no_count, other_count) vote.add_source(url) table = page.xpath("//table")[0] for row in table.xpath("tr"): for i in cols: name = row.xpath("string(td[%d])" % (i + name_offset)).strip() if not name or name == "VACANT": continue if "Y" in row.xpath("string(td[%d])" % (i + yes_offset)): vote.yes(name) elif "N" in row.xpath("string(td[%d])" % (i + no_offset)): vote.no(name) else: vote.other(name) bill.add_vote(vote)
def scrape_bill(self, session, bills): billdata, details = bills[0] (senate_url, assembly_url, bill_chamber, bill_type, bill_id, title, (letter, number, is_amd)) = details data = billdata['data']['bill'] assembly = AssemblyBillPage(self, session, bill_chamber, details) assembly.build() bill = assembly.bill bill.add_source(billdata['url']) # Add companion. if data['sameAs']: bill.add_companion(data['sameAs']) if data['summary']: bill['summary'] = data['summary'] if data['votes']: for vote_data in data['votes']: vote = Vote( chamber='upper', date=self.date_from_timestamp(vote_data['voteDate']), motion=vote_data['description'] or '[No motion available.]', passed=False, yes_votes=[], no_votes=[], other_votes=[], yes_count=0, no_count=0, other_count=0) for name in vote_data['ayes']: vote.yes(name) vote['yes_count'] += 1 for names in map(vote_data.get, ['absent', 'excused', 'abstains']): for name in names: vote.other(name) vote['other_count'] += 1 for name in vote_data['nays']: vote.no(name) vote['no_count'] += 1 vote['passed'] = vote['yes_count'] > vote['no_count'] bill.add_vote(vote) # if data['previousVersions']: # These are instances of the same bill from prior sessions. # import pdb; pdb.set_trace() if not data['title']: bill['title'] = bill['summary'] self.save_bill(bill)
def scrape_bill(self, session, bills): billdata, details = bills[0] (senate_url, assembly_url, bill_chamber, bill_type, bill_id, title, (letter, number, is_amd)) = details data = billdata["data"]["bill"] assembly = AssemblyBillPage(self, session, bill_chamber, details) assembly.build() bill = assembly.bill bill.add_source(billdata["url"]) # Add companion. if data["sameAs"]: bill.add_companion(data["sameAs"]) if data["summary"]: bill["summary"] = data["summary"] if data["votes"]: for vote_data in data["votes"]: vote = Vote( chamber="upper", date=self.date_from_timestamp(vote_data["voteDate"]), motion=vote_data["description"] or "[No motion available.]", passed=False, yes_votes=[], no_votes=[], other_votes=[], yes_count=0, no_count=0, other_count=0, ) for name in vote_data["ayes"]: vote.yes(name) vote["yes_count"] += 1 for names in map(vote_data.get, ["absent", "excused", "abstains"]): for name in names: vote.other(name) vote["other_count"] += 1 for name in vote_data["nays"]: vote.no(name) vote["no_count"] += 1 vote["passed"] = vote["yes_count"] > vote["no_count"] bill.add_vote(vote) # if data['previousVersions']: # These are instances of the same bill from prior sessions. # import pdb; pdb.set_trace() if not data["title"]: bill["title"] = bill["summary"] self.save_bill(bill)
def scrape_vote(self, bill, date, url): with self.urlopen(url) as page: page = lxml.html.fromstring(page) header = page.xpath("string(//h4[contains(@id, 'hdVote')])") location = header.split(", ")[1] if location.startswith("House"): chamber = "lower" elif location.startswith("Senate"): chamber = "upper" else: raise ScrapeError("Bad chamber: %s" % chamber) committee = " ".join(location.split(" ")[1:]).strip() if not committee or committee.startswith("of Representatives"): committee = None motion = ", ".join(header.split(", ")[2:]).strip() if not motion: # If we can't detect a motion, skip this vote return yes_count = int(page.xpath("string(//td[contains(@id, 'tdAyes')])")) no_count = int(page.xpath("string(//td[contains(@id, 'tdNays')])")) excused_count = int(page.xpath("string(//td[contains(@id, 'tdExcused')])")) absent_count = int(page.xpath("string(//td[contains(@id, 'tdAbsent')])")) other_count = excused_count + absent_count passed = yes_count > no_count if motion.startswith("Do Pass"): type = "passage" elif motion == "Concurred in amendments": type = "amendment" elif motion == "Veto override": type = "veto_override" else: type = "other" vote = Vote(chamber, date, motion, passed, yes_count, no_count, other_count) vote["type"] = type if committee: vote["committee"] = committee vote.add_source(url) for td in page.xpath("//table[contains(@id, 'tblVotes')]/tr/td"): if td.text == "Yea": vote.yes(td.getprevious().text.strip()) elif td.text == "Nay": vote.no(td.getprevious().text.strip()) elif td.text in ("Excused", "Absent"): vote.other(td.getprevious().text.strip()) bill.add_vote(vote)
def scrape_votes(self, bill, link): with self.urlopen(link) as page: page = lxml.html.fromstring(page) raw_vote_data = page.xpath("//span[@id='lblVoteData']")[0].text_content() raw_vote_data = re.split("\w+? by [\w ]+?\s+-", raw_vote_data.strip())[1:] for raw_vote in raw_vote_data: raw_vote = raw_vote.split(u"\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0") motion = raw_vote[0] vote_date = re.search("(\d+/\d+/\d+)", motion) if vote_date: vote_date = datetime.datetime.strptime(vote_date.group(), "%m/%d/%Y") passed = "Passed" in motion or "Recommended for passage" in motion or "Adopted" in raw_vote[1] vote_regex = re.compile("\d+$") aye_regex = re.compile("^.+voting aye were: (.+) -") no_regex = re.compile("^.+voting no were: (.+) -") other_regex = re.compile("^.+present and not voting were: (.+) -") yes_count = 0 no_count = 0 other_count = 0 ayes = [] nos = [] others = [] for v in raw_vote[1:]: v = v.strip() if v.startswith("Ayes...") and vote_regex.search(v): yes_count = int(vote_regex.search(v).group()) elif v.startswith("Noes...") and vote_regex.search(v): no_count = int(vote_regex.search(v).group()) elif v.startswith("Present and not voting...") and vote_regex.search(v): other_count += int(vote_regex.search(v).group()) elif aye_regex.search(v): ayes = aye_regex.search(v).groups()[0].split(", ") elif no_regex.search(v): nos = no_regex.search(v).groups()[0].split(", ") elif other_regex.search(v): others += other_regex.search(v).groups()[0].split(", ") if "ChamberVoting=H" in link: chamber = "lower" else: chamber = "upper" vote = Vote(chamber, vote_date, motion, passed, yes_count, no_count, other_count) vote.add_source(link) for a in ayes: vote.yes(a) for n in nos: vote.no(n) for o in others: vote.other(o) vote.validate() bill.add_vote(vote) return bill
def scrape_vote(self, chamber, session, bill_id, vote_url): NO_VOTE_URL = 'http://www.house.leg.state.mn.us/votes/novotefound.asp' resp = self.get(vote_url) html = resp.text # sometimes the link is broken, will redirect to NO_VOTE_URL if resp.url == NO_VOTE_URL: return doc = lxml.html.fromstring(html) try: motion = doc.xpath("//div[@id='leg_PageContent']/div/h2/text()")[0] except IndexError: self.logger.warning("Bill was missing a motion number, skipping") return vote_count = doc.xpath( ".//div[@id='leg_PageContent']/div/h3/text()")[1].split() yeas = int(vote_count[0]) nays = int(vote_count[3]) # second paragraph has date paragraphs = doc.xpath(".//div[@id='leg_PageContent']/div/p/text()") date = None for p in paragraphs: try: date = datetime.datetime.strptime(p.strip(), '%m/%d/%Y') break except ValueError: pass if date is None: self.logger.warning("No date could be found for vote on %s" % motion) return vote = Vote('lower', date, motion, yeas > nays, yeas, nays, 0, session=session, bill_id=bill_id, bill_chamber=chamber) vote.add_source(vote_url) # first table has YEAs for name in doc.xpath('//table[1]/tr/td/font/text()'): vote.yes(name.strip()) # second table is nays for name in doc.xpath('//table[2]/tr/td/font/text()'): vote.no(name.strip()) self.save_vote(vote)
def record_votes(root, session): for el in root.xpath(u'//div[starts-with(., "Yeas \u2014")]'): text = ''.join(el.getprevious().getprevious().itertext()) text.replace('\n', ' ') m = re.search( r'(?P<bill_id>\w+\W+\d+)(,?\W+as\W+amended,?)?\W+was\W+' '(?P<type>adopted|passed' '(\W+to\W+(?P<to>engrossment|third\W+reading))?)\W+' 'by\W+\(Record\W+(?P<record>\d+)\):\W+' '(?P<yeas>\d+)\W+Yeas,\W+(?P<nays>\d+)\W+Nays,\W+' '(?P<present>\d+)\W+Present', text) if m: yes_count = int(m.group('yeas')) no_count = int(m.group('nays')) other_count = int(m.group('present')) bill_id = m.group('bill_id') bill_id = bill_id.replace(u'\xa0', ' ') bill_id = re.sub(r'CS(SB|HB)', r'\1', bill_id) if bill_id.startswith('H') or bill_id.startswith('CSHB'): bill_chamber = 'lower' elif bill_id.startswith('S') or bill_id.startswith('CSSB'): bill_chamber = 'upper' else: continue motion = get_motion(m) vote = Vote(None, None, motion, True, yes_count, no_count, other_count) vote['bill_id'] = bill_id vote['bill_chamber'] = bill_chamber vote['session'] = session[0:2] vote['method'] = 'record' vote['record'] = m.group('record') vote['type'] = get_type(motion) for name in names(el): vote.yes(name) el = next_tag(el) if el.text and el.text.startswith('Nays'): for name in names(el): vote.no(name) el = next_tag(el) while el.text and re.match(r'Present|Absent', el.text): for name in names(el): vote.other(name) el = next_tag(el) vote['other_count'] = len(vote['other_votes']) yield vote else: pass
def parse_vote(self, bill, vote_date, vote_chamber, vote_status, vote_url): vote_chamber = 'upper' if vote_chamber == 'Senate' else 'lower' vote_date = datetime.datetime.strptime(vote_date, '%a %d %b %Y') vote_doc, resp = self.urlretrieve(vote_url) subprocess.check_call('abiword --to=ksvote.txt %s' % vote_doc, shell=True, cwd='/tmp/') vote_lines = open('/tmp/ksvote.txt').readlines() os.remove(vote_doc) vote = None passed = True for line in vote_lines: line = line.strip() totals = re.findall( 'Yeas (\d+)[;,] Nays (\d+)[;,] (?:Present but not voting:|Present and Passing) (\d+)[;,] (?:Absent or not voting:|Absent or Not Voting) (\d+)', line) if totals: totals = totals[0] yeas = int(totals[0]) nays = int(totals[1]) nv = int(totals[2]) absent = int(totals[3]) # default passed to true vote = Vote(vote_chamber, vote_date, vote_status, True, yeas, nays, nv + absent) elif line.startswith('Yeas:'): line = line.split(':', 1)[1].strip() for member in line.split(', '): if member != 'None.': vote.yes(member) elif line.startswith('Nays:'): line = line.split(':', 1)[1].strip() for member in line.split(', '): if member != 'None.': vote.no(member) elif line.startswith('Present '): line = line.split(':', 1)[1].strip() for member in line.split(', '): if member != 'None.': vote.other(member) elif line.startswith('Absent or'): line = line.split(':', 1)[1].strip() for member in line.split(', '): if member != 'None.': vote.other(member) elif 'the motion did not prevail' in line: passed = False if vote: vote['passed'] = passed vote.add_source(vote_url) bill.add_vote(vote)
def scrape_vote(self, bill, vote_url, chamber, date): page = self.lxmlize(vote_url) try: motion = page.xpath('//td/b/font[text()="MOTION:"]/../../following-sibling::td/font/text()')[0] except: self.warning("Vote Summary Page Broken ") return if 'withdrawn' not in motion: # Every table row after the one with VOTE in a td/div/b/font rolls = page.xpath('//tr[preceding-sibling::tr/td/div/b/font/text()="VOTE"]') count_row = rolls[-1] yes_count = count_row.xpath('.//b/font[normalize-space(text())="YES:"]' '/../following-sibling::font[1]/text()')[0] no_count = count_row.xpath('.//b/font[normalize-space(text())="NO:"]' '/../following-sibling::font[1]/text()')[0] exc_count = count_row.xpath('.//b/font[normalize-space(text())="EXC:"]' '/../following-sibling::font[1]/text()')[0] nv_count = count_row.xpath('.//b/font[normalize-space(text())="ABS:"]' '/../following-sibling::font[1]/text()')[0] if count_row.xpath('.//b/font[normalize-space(text())="FINAL ACTION:"]' '/../following-sibling::b[1]/font/text()'): final = count_row.xpath('.//b/font[normalize-space(text())="FINAL ACTION:"]' '/../following-sibling::b[1]/font/text()')[0] passed = True if 'pass' in final.lower() or int(yes_count) > int(no_count) else False elif 'passed without objection' in motion.lower(): passed = True yes_count = int(len(rolls[:-2])) else: self.warning("No vote breakdown found for %s" % vote_url) return other_count = int(exc_count) + int(nv_count) vote = Vote(chamber, date, motion, passed, int(yes_count), int(no_count), int(other_count)) for roll in rolls[:-2]: voter = roll.xpath('td[2]/div/font')[0].text_content() voted = roll.xpath('td[3]/div/font')[0].text_content().strip() if voted: if 'Yes' in voted: vote.yes(voter) elif 'No' in voted: vote.no(voter) else: vote.other(voter) elif 'passed without objection' in motion.lower() and voter: vote.yes(voter) bill.add_vote(vote)
def scrape_votes_old(self, bill, billname, session): vote_url = ('http://archives.legislature.state.oh.us/bills.cfm?ID=' + session + '_' + billname) page = self.get(vote_url).text page = lxml.html.fromstring(page) for jlink in page.xpath("//a[contains(@href, 'JournalText')]"): date = datetime.datetime.strptime(jlink.text, "%m/%d/%Y").date() details = jlink.xpath("string(../../../td[2])") chamber = details.split(" - ")[0] if chamber == 'House': chamber = 'lower' elif chamber == 'Senate': chamber = 'upper' else: raise ScrapeError("Bad chamber: %s" % chamber) motion = details.split(" - ")[1].split("\n")[0].strip() vote_row = jlink.xpath("../../..")[0].getnext() yea_div = vote_row.xpath( "td/font/div[contains(@id, 'Yea')]")[0] yeas = [] for td in yea_div.xpath("table/tr/td"): name = td.xpath("string()") if name: yeas.append(name) no_div = vote_row.xpath( "td/font/div[contains(@id, 'Nay')]")[0] nays = [] for td in no_div.xpath("table/tr/td"): name = td.xpath("string()") if name: nays.append(name) yes_count = len(yeas) no_count = len(nays) vote = Vote(chamber, date, motion, yes_count > no_count, yes_count, no_count, 0) for yes in yeas: vote.yes(yes) for no in nays: vote.no(no) vote.add_source(vote_url) bill.add_vote(vote)
def record_votes(root, session): for el in root.xpath(u'//div[starts-with(., "Yeas \u2014")]'): text = ''.join(el.getprevious().getprevious().itertext()) text.replace('\n', ' ') m = re.search(r'(?P<bill_id>\w+\W+\d+)(,?\W+as\W+amended,?)?\W+was\W+' '(?P<type>adopted|passed' '(\W+to\W+(?P<to>engrossment|third\W+reading))?)\W+' 'by\W+\(Record\W+(?P<record>\d+)\):\W+' '(?P<yeas>\d+)\W+Yeas,\W+(?P<nays>\d+)\W+Nays,\W+' '(?P<present>\d+)\W+Present', text) if m: yes_count = int(m.group('yeas')) no_count = int(m.group('nays')) other_count = int(m.group('present')) bill_id = m.group('bill_id') bill_id = bill_id.replace(u'\xa0', ' ') bill_id = re.sub(r'CS(SB|HB)', r'\1', bill_id) if bill_id.startswith('H') or bill_id.startswith('CSHB'): bill_chamber = 'lower' elif bill_id.startswith('S') or bill_id.startswith('CSSB'): bill_chamber = 'upper' else: continue motion = get_motion(m) vote = Vote(None, None, motion, True, yes_count, no_count, other_count) vote['bill_id'] = bill_id vote['bill_chamber'] = bill_chamber vote['session'] = session[0:2] vote['method'] = 'record' vote['record'] = m.group('record') vote['type'] = get_type(motion) for name in names(el): vote.yes(name) el = next_tag(el) if el.text and el.text.startswith('Nays'): for name in names(el): vote.no(name) el = next_tag(el) while el.text and re.match(r'Present|Absent', el.text): for name in names(el): vote.other(name) el = next_tag(el) vote['other_count'] = len(vote['other_votes']) yield vote else: pass
def scrape_votes(self, bill, bill_prefix, number, session): vote_url = ('http://www.legislature.state.oh.us/votes.cfm?ID=' + session + '_' + bill_prefix + '_' + str(number)) page = self.urlopen(vote_url) page = lxml.html.fromstring(page) for jlink in page.xpath("//a[contains(@href, 'JournalText')]"): date = datetime.datetime.strptime(jlink.text, "%m/%d/%Y").date() details = jlink.xpath("string(../../../td[2])") chamber = details.split(" - ")[0] if chamber == 'House': chamber = 'lower' elif chamber == 'Senate': chamber = 'upper' else: raise ScrapeError("Bad chamber: %s" % chamber) motion = details.split(" - ")[1].split("\n")[0].strip() vote_row = jlink.xpath("../../..")[0].getnext() yea_div = vote_row.xpath( "td/font/div[contains(@id, 'Yea')]")[0] yeas = [] for td in yea_div.xpath("table/tr/td"): name = td.xpath("string()") if name: yeas.append(name) no_div = vote_row.xpath( "td/font/div[contains(@id, 'Nay')]")[0] nays = [] for td in no_div.xpath("table/tr/td"): name = td.xpath("string()") if name: nays.append(name) yes_count = len(yeas) no_count = len(nays) vote = Vote(chamber, date, motion, yes_count > no_count, yes_count, no_count, 0) for yes in yeas: vote.yes(yes) for no in nays: vote.no(no) vote.add_source(vote_url) bill.add_vote(vote)
def scrape_votes(self, bill, votes_url): html = self.urlopen(votes_url) doc = lxml.html.fromstring(html) doc.make_links_absolute(votes_url) EXPECTED_VOTE_CODES = ['Y','N','E','NV','A','P','-'] # vote indicator, a few spaces, a name, newline or multiple spaces VOTE_RE = re.compile('(Y|N|E|NV|A|P|-)\s{2,5}(\w.+?)(?:\n|\s{2})') for link in doc.xpath('//a[contains(@href, "votehistory")]'): pieces = link.text.split(' - ') date = pieces[-1] if len(pieces) == 3: motion = pieces[1] else: motion = 'Third Reading' chamber = link.xpath('../following-sibling::td/text()')[0] if chamber == 'HOUSE': chamber = 'lower' elif chamber == 'SENATE': chamber = 'upper' else: self.warning('unknown chamber %s' % chamber) date = datetime.datetime.strptime(date, "%A, %B %d, %Y") # download the file fname, resp = self.urlretrieve(link.get('href')) pdflines = convert_pdf(fname, 'text').splitlines() os.remove(fname) vote = Vote(chamber, date, motion.strip(), False, 0, 0, 0) for line in pdflines: for match in VOTE_RE.findall(line): vcode, name = match if vcode == 'Y': vote.yes(name) elif vcode == 'N': vote.no(name) else: vote.other(name) # fake the counts vote['yes_count'] = len(vote['yes_votes']) vote['no_count'] = len(vote['no_votes']) vote['other_count'] = len(vote['other_votes']) vote['passed'] = vote['yes_count'] > vote['no_count'] vote.add_source(link.get('href')) bill.add_vote(vote)
def parse_vote(self, bill, vote_date, vote_chamber, vote_status, vote_url): vote_chamber = 'upper' if vote_chamber == 'Senate' else 'lower' vote_date = datetime.datetime.strptime(vote_date, '%a %d %b %Y') vote_doc, resp = self.urlretrieve(vote_url) subprocess.check_call('abiword --to=ksvote.txt %s' % vote_doc, shell=True, cwd='/tmp/') vote_lines = open('/tmp/ksvote.txt').readlines() os.remove(vote_doc) vote = None passed = True for line in vote_lines: line = line.strip() totals = re.findall('Yeas (\d+)[;,] Nays (\d+)[;,] (?:Present but not voting:|Present and Passing) (\d+)[;,] (?:Absent or not voting:|Absent or Not Voting) (\d+)', line) if totals: totals = totals[0] yeas = int(totals[0]) nays = int(totals[1]) nv = int(totals[2]) absent = int(totals[3]) # default passed to true vote = Vote(vote_chamber, vote_date, vote_status, True, yeas, nays, nv+absent) elif line.startswith('Yeas:'): line = line.split(':', 1)[1].strip() for member in line.split(', '): if member != 'None.': vote.yes(member) elif line.startswith('Nays:'): line = line.split(':', 1)[1].strip() for member in line.split(', '): if member != 'None.': vote.no(member) elif line.startswith('Present '): line = line.split(':', 1)[1].strip() for member in line.split(', '): if member != 'None.': vote.other(member) elif line.startswith('Absent or'): line = line.split(':', 1)[1].strip() for member in line.split(', '): if member != 'None.': vote.other(member) elif 'the motion did not prevail' in line: passed = False if vote: vote['passed'] = passed vote.add_source(vote_url) bill.add_vote(vote)
def parse_roll_call(self, url, chamber, date): with self.urlopen(url) as page: page = lxml.html.fromstring(page) motion_divs = page.xpath("//div[@class='font8text']") motion = motion_divs[3].text.strip() if not motion: try: motion = motion_divs[3].getnext().tail.strip() except AttributeError: motion = motion_divs[4].text.strip() if motion == 'FP': motion = 'FINAL PASSAGE' if motion == 'FINAL PASSAGE': type = 'passage' elif re.match(r'CONCUR(RENCE)? IN \w+ AMENDMENTS', motion): type = 'amendment' else: type = 'other' if not motion: motion = 'Unknown' yeas = int(page.xpath("//div[text() = 'YEAS']")[0].getnext().text) nays = int(page.xpath("//div[text() = 'NAYS']")[0].getnext().text) lve = int(page.xpath("//div[text() = 'LVE']")[0].getnext().text) nv = int(page.xpath("//div[text() = 'N/V']")[0].getnext().text) other = lve + nv passed = yeas > (nays + other) vote = Vote(chamber, date, motion, passed, yeas, nays, other, type=type) for span in page.xpath("//span[text() = 'Y' or text() = 'N'" "or text() = 'X' or text() = 'E']"): name = span.getnext().text.strip() if span.text == 'Y': vote.yes(name) elif span.text == 'N': vote.no(name) else: vote.other(name) return vote
def scrape_chamber_votes(self, chamber, session, url): xml = self.urlopen(url) doc = lxml.etree.fromstring(xml) for vxml in doc.xpath('//vote'): legislation = vxml.get('legislation') motion = vxml.get('caption') timestamp = datetime.datetime.strptime(vxml.get('dateTime'), '%Y-%m-%dT%H:%M:%S') leg_prefix = legislation.split(' ')[0] if leg_prefix in ('SB', 'SR'): bill_chamber = 'upper' elif leg_prefix in ('HB', 'HR'): bill_chamber = 'lower' elif leg_prefix in ('', 'EX', 'ELECTION'): continue else: raise Exception('unknown legislation prefix: ' + legislation) # skip bills from other chamber if bill_chamber != chamber: continue unknown_count = int(vxml.xpath('totals/@unknown')[0]) excused_count = int(vxml.xpath('totals/@excused')[0]) nv_count = int(vxml.xpath('totals/@not-voting')[0]) no_count = int(vxml.xpath('totals/@nays')[0]) yes_count = int(vxml.xpath('totals/@yeas')[0]) other_count = unknown_count + excused_count + nv_count vote = Vote(chamber, timestamp, motion, passed=yes_count > no_count, yes_count=yes_count, no_count=no_count, other_count=other_count, session=session, bill_id=legislation, bill_chamber=bill_chamber) vote.add_source(url) for m in vxml.xpath('member'): vote_letter = m.get('vote') member = m.get('name') if vote_letter == 'Y': vote.yes(member) elif vote_letter == 'N': vote.no(member) else: vote.other(member) self.save_vote(vote)
def get_lower_votes(self): url = ('http://assembly.state.ny.us/leg/?' 'default_fld=&bn=%s&term=%s&Votes=Y') url = url % (self.bill_id, self.term_start_year) doc = self.url2lxml(url) if doc is None: return pre = doc.xpath('//pre')[0].text_content() no_votes = ('There are no votes for this bill in this ' 'legislative session.') if pre == no_votes: return actual_vote = collections.defaultdict(list) for table in doc.xpath('//table'): date = table.xpath('caption/label[contains(., "DATE:")]') date = date[0].itersiblings().next().text date = datetime.datetime.strptime(date, '%m/%d/%Y') votes = table.xpath('caption/span/label[contains(., "YEA/NAY:")]') votes = votes[0].itersiblings().next().text yes_count, no_count = map(int, votes.split('/')) passed = yes_count > no_count vote = Vote('lower', date, 'Floor Vote', passed, yes_count, no_count, other_count=0) tds = table.xpath('tr/td/text()') votes = iter(tds) while True: try: data = list(islice(votes, 2)) name, vote_val = data except (StopIteration, ValueError): # End of data. Stop. break name = self._scrub_name(name) if vote_val.strip() == 'Y': vote.yes(name) elif vote_val.strip() in ('N', 'NO'): vote.no(name) else: vote.other(name) actual_vote[vote_val].append(name) # The page doesn't provide an other_count. vote['other_count'] = len(vote['other_votes']) vote['actual_vote'] = actual_vote self.bill.add_vote(vote)
def scrape_vote(self, bill, motion, url): page = self.get(url, retry_on_404=True).text page = lxml.html.fromstring(page) yeas_cell = page.xpath("//td[text() = 'Yeas (Y):']")[0] yes_count = int(yeas_cell.xpath("string(following-sibling::td)")) nays_cell = page.xpath("//td[text() = 'Nays (N):']")[0] no_count = int(nays_cell.xpath("string(following-sibling::td)")) abs_cell = page.xpath("//td[text() = 'Absent (X):']")[0] abs_count = int(abs_cell.xpath("string(following-sibling::td)")) ex_cell = page.xpath("//td[text() = 'Excused (E):']")[0] ex_count = int(ex_cell.xpath("string(following-sibling::td)")) other_count = abs_count + ex_count if 'chamber=House' in url: chamber = 'lower' elif 'chamber=Senate' in url: chamber = 'upper' date_cell = page.xpath("//td[text() = 'Date:']")[0] date = date_cell.xpath("string(following-sibling::td)") try: date = datetime.datetime.strptime(date, "%B %d, %Y") except ValueError: date = datetime.datetime.strptime(date, "%b. %d, %Y") outcome_cell = page.xpath("//td[text()='Outcome:']")[0] outcome = outcome_cell.xpath("string(following-sibling::td)") vote = Vote(chamber, date, motion, outcome == 'PREVAILS', yes_count, no_count, other_count) vote.add_source(url) member_cell = page.xpath("//td[text() = 'Member']")[0] for row in member_cell.xpath("../../tr")[1:]: name = row.xpath("string(td[2])") # name = name.split(" of ")[0] vtype = row.xpath("string(td[4])") if vtype == 'Y': vote.yes(name) elif vtype == 'N': vote.no(name) elif vtype == 'X' or vtype == 'E': vote.other(name) bill.add_vote(vote)
def _parse_senate_votes(self, vote_data): vote_datetime = datetime.datetime.strptime(vote_data['voteDate'], '%Y-%m-%d') vote = Vote( chamber='upper', date=vote_datetime.date(), motion='[No motion available.]', passed=False, yes_votes=[], no_votes=[], other_votes=[], yes_count=0, no_count=0, other_count=0) if vote_data['voteType'] == 'FLOOR': vote['motion'] = 'Floor Vote' elif vote_data['voteType'] == 'COMMITTEE': vote['motion'] = '{} Vote'.format(vote_data['committee']['name']) else: raise ValueError('Unknown vote type encountered.') vote_rolls = vote_data['memberVotes']['items'] # Count all yea votes. if 'items' in vote_rolls.get('AYE', {}): for legislator in vote_rolls['AYE']['items']: vote.yes(legislator['fullName']) vote['yes_count'] += 1 if 'items' in vote_rolls.get('AYEWR', {}): for legislator in vote_rolls['AYEWR']['items']: vote.yes(legislator['fullName']) vote['yes_count'] += 1 # Count all nay votes. if 'items' in vote_rolls.get('NAY', {}): for legislator in vote_rolls['NAY']['items']: vote.no(legislator['fullName']) vote['no_count'] += 1 # Count all other types of votes. other_vote_types = ('EXC', 'ABS', 'ABD') for vote_type in other_vote_types: if vote_rolls.get(vote_type, []): for legislator in vote_rolls[vote_type]['items']: vote.other(legislator['fullName']) vote['other_count'] += 1 vote['passed'] = vote['yes_count'] > vote['no_count'] return vote
def parse_vote(self, bill, actor, date, motion, url, uniqid): page = self.get(url).text bill.add_source(url) vote_re = re.compile( "YEAS -?\s?(\d+)(.*)NAYS -?\s?(\d+)" "(.*)ABSENT( OR NOT VOTING)? -?\s?" "(\d+)(.*)", re.MULTILINE | re.DOTALL, ) match = vote_re.search(page) yes_count = int(match.group(1)) no_count = int(match.group(3)) other_count = int(match.group(6)) if yes_count > no_count: passed = True else: passed = False if actor == "upper" or actor == "lower": vote_chamber = actor vote_location = "" else: vote_chamber = "" vote_location = actor vote = Vote( vote_chamber, date, motion, passed, yes_count, no_count, other_count, location=vote_location, _vote_id=uniqid, ) vote.add_source(url) yes_votes = re.split("\s{2,}", match.group(2).strip()) no_votes = re.split("\s{2,}", match.group(4).strip()) other_votes = re.split("\s{2,}", match.group(7).strip()) for yes in yes_votes: if yes: vote.yes(yes) for no in no_votes: if no: vote.no(no) for other in other_votes: if other: vote.other(other) bill.add_vote(vote)
def scrape_vote(self, bill, motion, url): page = self.urlopen(url, retry_on_404=True) page = lxml.html.fromstring(page) yeas_cell = page.xpath("//td[text() = 'Yeas (Y):']")[0] yes_count = int(yeas_cell.xpath("string(following-sibling::td)")) nays_cell = page.xpath("//td[text() = 'Nays (N):']")[0] no_count = int(nays_cell.xpath("string(following-sibling::td)")) abs_cell = page.xpath("//td[text() = 'Absent (X):']")[0] abs_count = int(abs_cell.xpath("string(following-sibling::td)")) ex_cell = page.xpath("//td[text() = 'Excused (E):']")[0] ex_count = int(ex_cell.xpath("string(following-sibling::td)")) other_count = abs_count + ex_count if 'chamber=House' in url: chamber = 'lower' elif 'chamber=Senate' in url: chamber = 'upper' date_cell = page.xpath("//td[text() = 'Date:']")[0] date = date_cell.xpath("string(following-sibling::td)") try: date = datetime.datetime.strptime(date, "%B %d, %Y") except ValueError: date = datetime.datetime.strptime(date, "%b. %d, %Y") outcome_cell = page.xpath("//td[text()='Outcome:']")[0] outcome = outcome_cell.xpath("string(following-sibling::td)") vote = Vote(chamber, date, motion, outcome == 'PREVAILS', yes_count, no_count, other_count) vote.add_source(url) member_cell = page.xpath("//td[text() = 'Member']")[0] for row in member_cell.xpath("../../tr")[1:]: name = row.xpath("string(td[2])") # name = name.split(" of ")[0] vtype = row.xpath("string(td[4])") if vtype == 'Y': vote.yes(name) elif vtype == 'N': vote.no(name) elif vtype == 'X' or vtype == 'E': vote.other(name) bill.add_vote(vote)
def scrape_votes(self, bill, sponsor, link): with self.urlopen(link) as page: page = lxml.html.fromstring(page) raw_vote_data = page.xpath( "//span[@id='lblVoteData']")[0].text_content() raw_vote_data = raw_vote_data.strip().split( '%s by %s - ' % (bill['bill_id'], sponsor))[1:] for raw_vote in raw_vote_data: raw_vote = raw_vote.split( u'\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0') motion = raw_vote[0] vote_date = re.search('(\d+/\d+/\d+)', motion) if vote_date: vote_date = datetime.datetime.strptime( vote_date.group(), '%m/%d/%Y') passed = ('Passed' in motion) or ('Adopted' in raw_vote[1]) vote_regex = re.compile('\d+$') aye_regex = re.compile('^.+voting aye were: (.+) -') no_regex = re.compile('^.+voting no were: (.+) -') yes_count = None no_count = None other_count = 0 ayes = [] nos = [] for v in raw_vote[1:]: if v.startswith('Ayes...') and vote_regex.search(v): yes_count = int(vote_regex.search(v).group()) elif v.startswith('Noes...') and vote_regex.search(v): no_count = int(vote_regex.search(v).group()) elif aye_regex.search(v): ayes = aye_regex.search(v).groups()[0].split(', ') elif no_regex.search(v): nos = no_regex.search(v).groups()[0].split(', ') if yes_count and no_count: passed = yes_count > no_count else: yes_count = no_count = 0 vote = Vote(bill['chamber'], vote_date, motion, passed, yes_count, no_count, other_count) vote.add_source(link) for a in ayes: vote.yes(a) for n in nos: vote.no(n) bill.add_vote(vote) return bill
def scrape_vote(self, bill, chamber, url): page = self.urlopen(url) if 'There are no details available for this roll call' in page: return page = page.replace(' ', ' ') page = lxml.html.fromstring(page) info_row = page.xpath("//table[1]/tr[2]")[0] date = info_row.xpath("string(td[1])") date = datetime.datetime.strptime(date, "%m/%d/%Y") motion = info_row.xpath("string(td[2])") yes_count = int(info_row.xpath("string(td[3])")) no_count = int(info_row.xpath("string(td[4])")) other_count = int(info_row.xpath("string(td[5])")) passed = info_row.xpath("string(td[6])") == 'Pass' if motion == 'Shall the bill pass?': type = 'passage' elif motion == 'Shall the bill be read the third time?': type = 'reading:3' elif 'be amended as' in motion: type = 'amendment' else: type = 'other' vote = Vote(chamber, date, motion, passed, yes_count, no_count, other_count) vote.add_source(url) for tr in page.xpath("//table[1]/tr")[3:]: if len(tr.xpath("td")) != 2: continue # avoid splitting duplicate names name = tr.xpath("string(td[1])").strip() if not name.startswith(DOUBLED_NAMES): name = name.split(' of')[0] type = tr.xpath("string(td[2])").strip() if type.startswith('Yea'): vote.yes(name) elif type.startswith('Nay'): vote.no(name) elif type.startswith('Not Voting'): pass else: vote.other(name) bill.add_vote(vote)
def scrape_vote(self, bill, vote_id): vote_url = 'https://legis.delaware.gov/json/RollCall/GetRollCallVoteByRollCallId' form = { 'rollCallId': vote_id, 'sort': '', 'group': '', 'filter': '', } response = self.post(url=vote_url, data=form, allow_redirects=True) if response.content: page = json.loads(response.content) roll = page['Model'] vote_chamber = self.chamber_map[roll['ChamberName']] #"7/1/16 01:00 AM" vote_date = datetime.strptime(roll['TakenAtDateTime'], '%m/%d/%y %I:%M %p') #TODO: What does this code mean? vote_motion = roll['RollCallVoteType'] vote_passed = True if roll['RollCallStatus'] == 'Passed' else False other_count = int(roll['NotVotingCount']) + int( roll['VacantVoteCount']) + int(roll['AbsentVoteCount']) + int( roll['ConflictVoteCount']) vote = Vote(chamber=vote_chamber, date=vote_date, motion=vote_motion, passed=vote_passed, yes_count=roll['YesVoteCount'], no_count=roll['NoVoteCount'], other_count=other_count) for row in roll['AssemblyMemberVotes']: # AssemblyMemberId looks like it should work here, # but for some sessions it's bugged to only return session try: voter = self.legislators_by_short[str(row['ShortName'])] name = voter['DisplayName'] except KeyError: self.warning('could not find legislator short name %s', row['ShortName']) name = row['ShortName'] if row['SelectVoteTypeCode'] == 'Y': vote.yes(name) elif row['SelectVoteTypeCode'] == 'N': vote.no(name) else: vote.other(name) bill.add_vote(vote)
def scrape_votes(self, bill, page): for b in page.xpath("//div/b[starts-with(., 'VOTE: FLOOR VOTE:')]"): date = b.text.split('-')[1].strip() date = datetime.datetime.strptime(date, "%b %d, %Y").date() yes_votes, no_votes, other_votes = [], [], [] yes_count, no_count, other_count = 0, 0, 0 vtype = None for tag in b.xpath("following-sibling::blockquote/*"): if tag.tag == 'b': text = tag.text if text.startswith('Ayes'): vtype = 'yes' yes_count = int(re.search( r'\((\d+)\):', text).group(1)) elif text.startswith('Nays'): vtype = 'no' no_count = int(re.search( r'\((\d+)\):', text).group(1)) elif (text.startswith('Excused') or text.startswith('Abstain') or text.startswith('Absent') ): vtype = 'other' other_count += int(re.search( r'\((\d+)\):', text).group(1)) else: raise ValueError('bad vote type: %s' % tag.text) elif tag.tag == 'a': name = tag.text.strip() if vtype == 'yes': yes_votes.append(name) elif vtype == 'no': no_votes.append(name) elif vtype == 'other': other_votes.append(name) passed = yes_count > (no_count + other_count) vote = Vote('upper', date, 'Floor Vote', passed, yes_count, no_count, other_count) for name in yes_votes: vote.yes(name) for name in no_votes: vote.no(name) for name in other_votes: vote.other(name) bill.add_vote(vote)
def _parse_senate_votes(self, vote_data): vote_datetime = datetime.datetime.strptime(vote_data['voteDate'], '%Y-%m-%d') vote = Vote(chamber='upper', date=vote_datetime.date(), motion='[No motion available.]', passed=False, yes_votes=[], no_votes=[], other_votes=[], yes_count=0, no_count=0, other_count=0) if vote_data['voteType'] == 'FLOOR': vote['motion'] = 'Floor Vote' elif vote_data['voteType'] == 'COMMITTEE': vote['motion'] = '{} Vote'.format(vote_data['committee']['name']) else: raise ValueError('Unknown vote type encountered.') vote_rolls = vote_data['memberVotes']['items'] # Count all yea votes. if 'items' in vote_rolls.get('AYE', {}): for legislator in vote_rolls['AYE']['items']: vote.yes(legislator['fullName']) vote['yes_count'] += 1 if 'items' in vote_rolls.get('AYEWR', {}): for legislator in vote_rolls['AYEWR']['items']: vote.yes(legislator['fullName']) vote['yes_count'] += 1 # Count all nay votes. if 'items' in vote_rolls.get('NAY', {}): for legislator in vote_rolls['NAY']['items']: vote.no(legislator['fullName']) vote['no_count'] += 1 # Count all other types of votes. other_vote_types = ('EXC', 'ABS', 'ABD') for vote_type in other_vote_types: if vote_rolls.get(vote_type, []): for legislator in vote_rolls[vote_type]['items']: vote.other(legislator['fullName']) vote['other_count'] += 1 vote['passed'] = vote['yes_count'] > vote['no_count'] return vote
def scrape_votes(self, bill_page, bill, insert, year): root = lxml.html.fromstring(bill_page) for link in root.xpath('//a[contains(text(), "Passage")]'): motion = link.text if 'Assembly' in motion: chamber = 'lower' else: chamber = 'upper' vote_url = 'http://www.leg.state.nv.us/Session/%s/Reports/%s' % ( insert, link.get('href')) bill.add_source(vote_url) page = self.urlopen(vote_url) page = page.replace(u"\xa0", " ") root = lxml.html.fromstring(page) date = root.xpath('//h1/text()')[-1].strip() if not date: date = root.xpath('//h1/text()')[-2].strip() date = datetime.strptime(date, "%B %d, %Y at %H:%M %p") top_block_text = root.xpath( '//div[@align="center"]')[0].text_content() yes_count = int(re.findall("(\d+) Yea", top_block_text)[0]) no_count = int(re.findall("(\d+) Nay", top_block_text)[0]) excused = int(re.findall("(\d+) Excused", top_block_text)[0]) not_voting = int(re.findall("(\d+) Not Voting", top_block_text)[0]) absent = int(re.findall("(\d+) Absent", top_block_text)[0]) other_count = excused + not_voting + absent passed = yes_count > no_count vote = Vote(chamber, date, motion, passed, yes_count, no_count, other_count, not_voting=not_voting, absent=absent) for el in root.xpath('//table[2]/tr'): tds = el.xpath('td') name = tds[1].text_content().strip() vote_result = tds[2].text_content().strip() if vote_result == 'Yea': vote.yes(name) elif vote_result == 'Nay': vote.no(name) else: vote.other(name) bill.add_vote(vote)
def scrape_votes(self, bill, page): for b in page.xpath("//div/b[starts-with(., 'VOTE: FLOOR VOTE:')]"): date = b.text.split('-')[1].strip() date = datetime.datetime.strptime(date, "%b %d, %Y").date() yes_votes, no_votes, other_votes = [], [], [] yes_count, no_count, other_count = 0, 0, 0 vtype = None for tag in b.xpath("following-sibling::blockquote/*"): if tag.tag == 'b': text = tag.text if text.startswith('Ayes'): vtype = 'yes' yes_count = int(re.search( r'\((\d+)\):', text).group(1)) elif text.startswith('Nays'): vtype = 'no' no_count = int(re.search( r'\((\d+)\):', text).group(1)) elif (text.startswith('Excused') or text.startswith('Abstains') or text.startswith('Absent') ): vtype = 'other' other_count += int(re.search( r'\((\d+)\):', text).group(1)) else: raise ValueError('bad vote type: %s' % tag.text) elif tag.tag == 'a': name = tag.text.strip() if vtype == 'yes': yes_votes.append(name) elif vtype == 'no': no_votes.append(name) elif vtype == 'other': other_votes.append(name) passed = yes_count > (no_count + other_count) vote = Vote('upper', date, 'Floor Vote', passed, yes_count, no_count, other_count) for name in yes_votes: vote.yes(name) for name in no_votes: vote.no(name) for name in other_votes: vote.other(name) bill.add_vote(vote)
def parse_vote(self, bill, actor, date, motion, url, uniqid): page = self.get(url).text bill.add_source(url) vote_re = re.compile( 'YEAS -?\s?(\d+)(.*)NAYS -?\s?(\d+)' '(.*)ABSENT( OR NOT VOTING)? -?\s?' '(\d+)(.*)', re.MULTILINE | re.DOTALL) match = vote_re.search(page) yes_count = int(match.group(1)) no_count = int(match.group(3)) other_count = int(match.group(6)) if yes_count > no_count: passed = True else: passed = False if actor == 'upper' or actor == 'lower': vote_chamber = actor vote_location = '' else: vote_chamber = '' vote_location = actor vote = Vote(vote_chamber, date, motion, passed, yes_count, no_count, other_count, location=vote_location, _vote_id=uniqid) vote.add_source(url) yes_votes = re.split('\s{2,}', match.group(2).strip()) no_votes = re.split('\s{2,}', match.group(4).strip()) other_votes = re.split('\s{2,}', match.group(7).strip()) for yes in yes_votes: if yes: vote.yes(yes) for no in no_votes: if no: vote.no(no) for other in other_votes: if other: vote.other(other) bill.add_vote(vote)
def process_vote(self, data): chamber = parse_psuedo_id(data['organization'])['classification'] bill_chamber, bill_id = self.get_bill_details(data['bill']) if chamber == 'legislature': chamber = 'upper' if bill_chamber == 'legislature': bill_chamber = 'upper' yes_count = None no_count = None other_count = 0 for vc in data['counts']: if vc['option'] == 'yes': yes_count = vc['value'] elif vc['option'] == 'no': no_count = vc['value'] else: other_count += vc['value'] vote = Vote( chamber=chamber, date=parse_date(data['start_date']), motion=data['motion_text'], passed=data['result'] == 'pass', yes_count=yes_count, no_count=no_count, other_count=other_count, action=data['bill_action'], # TODO: was data['motion_classification'], type='other', session=data['legislative_session'], bill_chamber=bill_chamber, bill_id=bill_id, ) for vr in data['votes']: if vr['option'] == 'yes': vote.yes(vr['voter_name']) elif vr['option'] == 'no': vote.no(vr['voter_name']) else: vote.other(vr['voter_name']) for source in data['sources']: vote.add_source(source['url']) vote.update(**data['extras']) self.save_vote(vote)
def scrape_votes(self, bill_page, bill, insert, year): root = lxml.html.fromstring(bill_page) for link in root.xpath('//a[contains(text(), "Passage")]'): motion = link.text if "Assembly" in motion: chamber = "lower" else: chamber = "upper" vote_url = "http://www.leg.state.nv.us/Session/%s/Reports/%s" % (insert, link.get("href")) bill.add_source(vote_url) with self.urlopen(vote_url) as page: page = page.decode("utf8").replace(u"\xa0", " ") root = lxml.html.fromstring(page) date = root.xpath("//h1/text()")[-1].strip() date = datetime.strptime(date, "%B %d, %Y at %H:%M %p") top_block_text = root.xpath('//div[@align="center"]')[0].text_content() yes_count = int(re.findall("(\d+) Yea", top_block_text)[0]) no_count = int(re.findall("(\d+) Nay", top_block_text)[0]) excused = int(re.findall("(\d+) Excused", top_block_text)[0]) not_voting = int(re.findall("(\d+) Not Voting", top_block_text)[0]) absent = int(re.findall("(\d+) Absent", top_block_text)[0]) other_count = excused + not_voting + absent passed = yes_count > no_count vote = Vote( chamber, date, motion, passed, yes_count, no_count, other_count, not_voting=not_voting, absent=absent, ) for el in root.xpath("//table[2]/tr"): tds = el.xpath("td") name = tds[1].text_content().strip() vote_result = tds[2].text_content().strip() if vote_result == "Yea": vote.yes(name) elif vote_result == "Nay": vote.no(name) else: vote.other(name) bill.add_vote(vote)
def scrape_vote(self, bill, vote_type_id, vote_type): base_url = 'http://dcclims1.dccouncil.us/lims/voting.aspx?VoteTypeID=%s&LegID=%s' url = base_url % (vote_type_id, bill['bill_id']) with self.urlopen(url) as html: doc = lxml.html.fromstring(html) vote_date = convert_date(doc.get_element_by_id('VoteDate').text) # check if voice vote / approved boxes have an 'x' voice = ( doc.xpath('//span[@id="VoteTypeVoice"]/b/text()')[0] == 'x') passed = (doc.xpath('//span[@id="VoteResultApproved"]/b/text()')[0] == 'x') yes_count = extract_int( doc.xpath('//span[@id="VoteCount1"]/b/text()')[0]) no_count = extract_int( doc.xpath('//span[@id="VoteCount2"]/b/text()')[0]) other_count = 0 for n in xrange(3, 9): other_count += extract_int( doc.xpath('//span[@id="VoteCount%s"]/b/text()' % n)[0]) vote = Vote('upper', vote_date, vote_type, passed, yes_count, no_count, other_count, voice_vote=voice) vote.add_source(url) # members are only text on page in a <u> tag for member_u in doc.xpath('//u'): member = member_u.text # normalize case vote_text = member_u.xpath('../../i/text()')[0].upper() if 'YES' in vote_text: vote.yes(member) elif 'NO' in vote_text: vote.no(member) else: vote.other(member) bill.add_vote(vote)
def parse_vote(self, bill, action, chamber, date): pattern = r"were as follows: (?P<n_yes>\d+) Aye\(?s\)?:\s+(?P<yes>.*?);\s+Aye\(?s\)? with reservations:\s+(?P<yes_resv>.*?);\s+(?P<n_no>\d*) No\(?es\)?:\s+(?P<no>.*?);\s+and (?P<n_excused>\d*) Excused: (?P<excused>.*)" if 'as follows' in action: result = re.search(pattern, action).groupdict() motion = action.split('.')[0] + '.' vote = Vote(chamber, date, motion, 'PASSED' in action, int(result['n_yes'] or 0), int(result['n_no'] or 0), int(result['n_excused'] or 0)) for voter in split_specific_votes(result['yes']): vote.yes(voter) for voter in split_specific_votes(result['yes_resv']): vote.yes(voter) for voter in split_specific_votes(result['no']): vote.no(voter) for voter in split_specific_votes(result['excused']): vote.other(voter) bill.add_vote(vote)
def parse_vote(self, actor, date, row): """ takes the actor, date and row element and returns a Vote object """ spans = row.xpath('.//span') motion = row.text passed, yes_count, no_count, other_count = spans[0].text_content( ).split('-') yes_votes = [ name for name in spans[1].tail.replace(u'\xa0--\xa0', '').split(',') if name ] no_votes = [ name for name in spans[2].tail.replace(u'\xa0--\xa0', '').split(',') if name ] other_votes = [] if spans[3].text.startswith('Absent'): other_votes = [ name for name in spans[3].tail.replace(u'\xa0--\xa0', '').split(',') if name ] for key, val in { 'adopted': True, 'passed': True, 'failed': False }.items(): if key in passed.lower(): passed = val break vote = Vote(actor, date, motion, passed, int(yes_count), int(no_count), int(other_count)) for name in yes_votes: if name and name != 'None': vote.yes(name) for name in no_votes: if name and name != 'None': vote.no(name) for name in other_votes: if name and name != 'None': vote.other(name) return vote