def scrape_senate_vote(self, bill, url, date): try: filename, resp = self.urlretrieve(url) except scrapelib.HTTPError: self.warning("missing vote file %s" % url) return vote = VoteEvent( chamber="upper", start_date=date.strftime("%Y-%m-%d"), motion_text="Passage", # setting 'fail' for now. result="fail", classification="passage", bill=bill, ) vote.add_source(url) vote.pupa_id = url text = convert_pdf(filename, "text").decode("utf-8") os.remove(filename) if re.search(r"Yea:\s+\d+\s+Nay:\s+\d+\s+Absent:\s+\d+", text): yield from self.scrape_senate_vote_3col(bill, vote, text, url, date) return data = re.split(r"(Yea|Nay|Absent)s?:", text)[::-1] data = filter(None, data) keymap = dict(yea="yes", nay="no") actual_vote = collections.defaultdict(int) vote_count = {"yes": 0, "no": 0, "other": 0} while True: if not data: break vote_val = data.pop() key = keymap.get(vote_val.lower(), "other") values = data.pop() for name in re.split(r"(?:[\s,]+and\s|[\s,]{2,})", values): if name.lower().strip() == "none.": continue name = name.replace("..", "") name = re.sub(r"\.$", "", name) name = name.strip("-1234567890 \n") if not name: continue vote.vote(key, name) actual_vote[vote_val] += 1 vote_count[key] += 1 assert actual_vote[vote_val] == vote_count[key] for key, value in vote_count.items(): vote.set_count(key, value) # updating result with actual value vote.result = ( "pass" if vote_count["yes"] > (vote_count["no"] + vote_count["other"]) else "fail" ) yield vote
def get_vote_event(self, bill, act, votes, result): '''Make VoteEvent object from given Bill, action, votes and result.''' organization = json.loads(act['organization_id'].lstrip('~')) vote_event = VoteEvent(legislative_session=bill.legislative_session, motion_text=act['description'], organization=organization, classification=None, start_date=act['date'], result=result, bill=bill) legistar_web, legistar_api = [src['url'] for src in bill.sources] vote_event.add_source(legistar_web) vote_event.add_source(legistar_api + '/histories') for vote in votes: raw_option = vote['VoteValueName'].lower() if raw_option == 'suspended': continue clean_option = self.VOTE_OPTIONS.get(raw_option, raw_option) vote_event.vote(clean_option, vote['VotePersonName'].strip()) return vote_event
def parse_vote_page(self, vote_url, bill): vote_html = self.get(vote_url).text doc = lxml.html.fromstring(vote_html) # chamber if "senate" in vote_url: chamber = "upper" else: chamber = "lower" # date in the following format: Mar 23, 2009 date = doc.xpath('//td[starts-with(text(), "Legislative")]')[0].text date = date.replace(u"\xa0", " ") date = datetime.datetime.strptime(date[18:], "%b %d, %Y") # motion motion = "".join(x.text_content() for x in doc.xpath('//td[@colspan="23"]')) if motion == "": motion = "No motion given" # XXX: Double check this. See SJ 3. motion = motion.replace(u"\xa0", " ") # totals tot_class = doc.xpath('//td[contains(text(), "Yeas")]')[0].get("class") totals = doc.xpath('//td[@class="%s"]/text()' % tot_class)[1:] yes_count = int(totals[0].split()[-1]) no_count = int(totals[1].split()[-1]) other_count = int(totals[2].split()[-1]) other_count += int(totals[3].split()[-1]) other_count += int(totals[4].split()[-1]) passed = yes_count > no_count vote = VoteEvent( bill=bill, chamber=chamber, start_date=date.strftime("%Y-%m-%d"), motion_text=motion, classification="passage", result="pass" if passed else "fail", ) vote.pupa_id = vote_url # contains sequence number vote.set_count("yes", yes_count) vote.set_count("no", no_count) vote.set_count("other", other_count) # go through, find Voting Yea/Voting Nay/etc. and next tds are voters func = None for td in doc.xpath("//td/text()"): td = td.replace(u"\xa0", " ") if td.startswith("Voting Yea"): func = vote.yes elif td.startswith("Voting Nay"): func = vote.no elif td.startswith("Not Voting"): func = vote.other elif td.startswith("Excused"): func = vote.other elif func: td = td.rstrip("*") func(td) return vote
def add_vote(self, bill, chamber, date, text, url): votes = re.findall(r'Ayes,?[\s]?(\d+)[,;]\s+N(?:oes|ays),?[\s]?(\d+)', text) yes, no = int(votes[0][0]), int(votes[0][1]) vtype = 'other' for regex, type in motion_classifiers.items(): if re.match(regex, text): vtype = type break v = VoteEvent( chamber=chamber, start_date=TIMEZONE.localize(date), motion_text=text, result='pass' if yes > no else 'fail', classification=vtype, bill=bill, ) v.set_count('yes', yes) v.set_count('no', no) # fetch the vote itself if url: v.add_source(url) if 'av' in url: self.add_house_votes(v, url) elif 'sv' in url: self.add_senate_votes(v, url) return v
def build_vote(session, bill_id, url, vote_record, chamber, motion_text): # When they vote in a substitute they mark it as XHB bill_id = bill_id.replace("XHB", "HB") passed = len(vote_record["yes"]) > len(vote_record["no"]) vote_event = VoteEvent( result="pass" if passed else "fail", chamber=chamber, start_date=vote_record["date"].strftime("%Y-%m-%d"), motion_text=motion_text, classification="passage", legislative_session=session, bill=bill_id, bill_chamber="upper" if bill_id[0] == "S" else "lower", ) vote_event.pupa_id = url vote_event.set_count("yes", len(vote_record["yes"])) vote_event.set_count("no", len(vote_record["no"])) vote_event.set_count("excused", len(vote_record["excused"])) vote_event.set_count("absent", len(vote_record["absent"])) vote_event.set_count("other", len(vote_record["other"])) for vote_type in ["yes", "no", "excused", "absent", "other"]: for voter in vote_record[vote_type]: vote_event.vote(vote_type, voter) vote_event.add_source(url) return vote_event
def scrape_vote(self, bill, motion, url): page = self.get(url, retry_on_404=True).text page = lxml.html.fromstring(page) yeas_cell = page.xpath("//td[text() = 'Yeas (Y):']")[0] yes_count = int(yeas_cell.xpath("string(following-sibling::td)")) nays_cell = page.xpath("//td[text() = 'Nays (N):']")[0] no_count = int(nays_cell.xpath("string(following-sibling::td)")) abs_cell = page.xpath("//td[text() = 'Absent (X):']")[0] abs_count = int(abs_cell.xpath("string(following-sibling::td)")) ex_cell = page.xpath("//td[text() = 'Excused (E):']")[0] ex_count = int(ex_cell.xpath("string(following-sibling::td)")) other_count = abs_count + ex_count if 'chamber=House' in url: chamber = 'lower' elif 'chamber=Senate' in url: chamber = 'upper' date_cell = page.xpath("//td[text() = 'Date:']")[0] date = date_cell.xpath("string(following-sibling::td)") try: date = datetime.datetime.strptime(date, "%B %d, %Y") except ValueError: date = datetime.datetime.strptime(date, "%b. %d, %Y") outcome_cell = page.xpath("//td[text()='Outcome:']")[0] outcome = outcome_cell.xpath("string(following-sibling::td)") vote = VoteEvent( chamber=chamber, start_date=date.strftime('%Y-%m-%d'), motion_text=motion, result='pass' if outcome == 'PREVAILS' else 'fail', classification='passage', bill=bill, ) vote.set_count('yes', yes_count) vote.set_count('no', no_count) vote.set_count('other', other_count) vote.add_source(url) member_cell = page.xpath("//td[text() = 'Member']")[0] for row in member_cell.xpath("../../tr")[1:]: name = row.xpath("string(td[2])") # name = name.split(" of ")[0] vtype = row.xpath("string(td[4])") if vtype == 'Y': vote.vote('yes', name) elif vtype == 'N': vote.vote('no', name) elif vtype == 'X' or vtype == 'E': vote.vote('other', name) yield vote
def add_vote(self, bill, chamber, date, text, url): votes = re.findall(r"Ayes,?[\s]?(\d+)[,;]\s+N(?:oes|ays),?[\s]?(\d+)", text) yes, no = int(votes[0][0]), int(votes[0][1]) vtype = "other" for regex, type in motion_classifiers.items(): if re.match(regex, text): vtype = type break v = VoteEvent( chamber=chamber, start_date=TIMEZONE.localize(date), motion_text=text, result="pass" if yes > no else "fail", classification=vtype, bill=bill, ) v.pupa_id = url.split("/")[-1] v.set_count("yes", yes) v.set_count("no", no) # fetch the vote itself if url: v.add_source(url) if "av" in url: self.add_house_votes(v, url) elif "sv" in url: self.add_senate_votes(v, url) return v
def parse_bill_actions_table(self, bill, action_table, bill_id, session, url, bill_chamber): for action in action_table.xpath('*')[1:]: date = action[0].text_content() date = dt.datetime.strptime(date, "%m/%d/%Y").strftime('%Y-%m-%d') actor = action[1].text_content() string = action[2].text_content() actor = { "S": "upper", "H": "lower", "D": "Data Systems", "$": "Appropriation measure", "ConAm": "Constitutional Amendment" }[actor] act_type, committees = categorize_action(string) # XXX: Translate short-code to full committee name for the # matcher. real_committees = [] if committees: for committee in committees: try: committee = self.short_ids[committee]['name'] real_committees.append(committee) except KeyError: pass act = bill.add_action(string, date, chamber=actor, classification=act_type) for committee in real_committees: act.add_related_entity(name=committee, entity_type="organization") vote = self.parse_vote(string) if vote: v, motion = vote vote = VoteEvent( start_date=date, chamber=actor, bill=bill_id, bill_chamber=bill_chamber, legislative_session=session, motion_text=motion, result='pass' if 'passed' in string.lower() else 'fail', classification='passage') vote.add_source(url) vote.set_count('yes', int(v['n_yes'] or 0)) vote.set_count('no', int(v['n_no'] or 0)) vote.set_count('not voting', int(v['n_excused'] or 0)) for voter in split_specific_votes(v['yes']): vote.yes(voter) for voter in split_specific_votes(v['yes_resv']): vote.yes(voter) for voter in split_specific_votes(v['no']): vote.no(voter) for voter in split_specific_votes(v['excused']): vote.vote('not voting', voter) yield vote
def record_votes(root, session, chamber): for el in root.xpath('//div{}'.format(''.join(vote_selectors))): mv = MaybeVote(el) if not mv.is_valid: continue v = VoteEvent(chamber=chamber, start_date=None, motion_text='passage' if mv.passed else 'other', result='pass' if mv.passed else 'fail', classification='passage' if mv.passed else 'other', legislative_session=session[0:2], bill=mv.bill_id, bill_chamber=mv.chamber) v.set_count('yes', mv.yeas or 0) v.set_count('no', mv.nays or 0) v.set_count('not voting', mv.present or 0) for each in mv.votes['yeas']: v.yes(each) for each in mv.votes['nays']: v.no(each) for each in mv.votes['present']: v.vote('not voting', each) for each in mv.votes['absent']: v.vote('absent', each) yield v
def scrape_votes(self, bill, bill_page, chamber): vote_links = bill_page.xpath( '//div[contains(@class, "col-sm-8")]//a[contains(@href, "view_votes")]') for vote_link in vote_links: vote_url = vote_link.attrib['href'] date_td, motion_td, *_ = vote_link.xpath('ancestor::tr/td') date = datetime.strptime(date_td.text, '%b %d, %Y') motion_text = motion_td.text_content() vote_page = self.lxmlize(vote_url) passed = ( 'Passed' in motion_text or 'Advanced' in motion_text ) cells = vote_page.xpath('//table[contains(@class, "calendar-table")]//td') vote = VoteEvent( bill=bill, chamber=chamber, start_date=TIMEZONE.localize(date), motion_text=motion_text, classification='passage', result='pass' if passed else 'fail', ) query_params = urllib.parse.parse_qs(urllib.parse.urlparse(vote_url).query) vote.pupa_id = query_params['KeyID'][0] vote.add_source(vote_url) for chunk in range(0, len(cells), 2): name = cells[chunk].text vote_type = cells[chunk + 1].text if name and vote_type: vote.vote(VOTE_TYPE_MAP.get(vote_type.lower(), 'other'), name) yield vote
def record_votes(root, session, chamber): for el in root.xpath("//div{}".format("".join(vote_selectors))): mv = MaybeVote(el) if not mv.is_valid: continue v = VoteEvent( chamber=chamber, start_date=None, motion_text="passage" if mv.passed else "other", result="pass" if mv.passed else "fail", classification="passage" if mv.passed else "other", legislative_session=session[0:2], bill=mv.bill_id, bill_chamber=mv.chamber, ) v.set_count("yes", mv.yeas or 0) v.set_count("no", mv.nays or 0) v.set_count("not voting", mv.present or 0) for each in mv.votes["yeas"]: v.yes(each) for each in mv.votes["nays"]: v.no(each) for each in mv.votes["present"]: v.vote("not voting", each) for each in mv.votes["absent"]: v.vote("absent", each) yield v
def build_vote(session, bill_id, url, vote_record, chamber, motion_text): # When they vote in a substitute they mark it as XHB bill_id = bill_id.replace('XHB', 'HB') passed = len(vote_record['yes']) > len(vote_record['no']) vote_event = VoteEvent( result='pass' if passed else 'fail', chamber=chamber, start_date=vote_record['date'].strftime('%Y-%m-%d'), motion_text=motion_text, classification='passage', legislative_session=session, bill=bill_id, bill_chamber='upper' if bill_id[0] == 'S' else 'lower') vote_event.pupa_id = url vote_event.set_count('yes', len(vote_record['yes'])) vote_event.set_count('no', len(vote_record['no'])) vote_event.set_count('excused', len(vote_record['excused'])) vote_event.set_count('absent', len(vote_record['absent'])) vote_event.set_count('other', len(vote_record['other'])) for vote_type in ['yes', 'no', 'excused', 'absent', 'other']: for voter in vote_record[vote_type]: vote_event.vote(vote_type, voter) vote_event.add_source(url) return vote_event
def scrape_votes_old(self, bill, billname, session): vote_url = ("http://archives.legislature.state.oh.us/bills.cfm?ID=" + session + "_" + billname) page = self.get(vote_url).text page = lxml.html.fromstring(page) for jlink in page.xpath("//a[contains(@href, 'JournalText')]"): date = self._tz.localize( datetime.datetime.strptime(jlink.text, "%m/%d/%Y")).date() date = "{:%Y-%m-%d}".format(date) details = jlink.xpath("string(../../../td[2])") chamber = details.split(" - ")[0] if chamber == "House": chamber = "lower" elif chamber == "Senate": chamber = "upper" else: raise ScrapeError("Bad chamber: %s" % chamber) motion = details.split(" - ")[1].split("\n")[0].strip() vote_row = jlink.xpath("../../..")[0].getnext() yea_div = vote_row.xpath("td/font/div[contains(@id, 'Yea')]")[0] yeas = [] for td in yea_div.xpath("table/tr/td"): name = td.xpath("string()") if name: yeas.append(name) no_div = vote_row.xpath("td/font/div[contains(@id, 'Nay')]")[0] nays = [] for td in no_div.xpath("table/tr/td"): name = td.xpath("string()") if name: nays.append(name) yes_count = len(yeas) no_count = len(nays) vote = VoteEvent( chamber=chamber, start_date=date, motion_text=motion, result="pass" if yes_count > no_count else "fail", bill=bill, classification="passed", ) for yes in yeas: vote.yes(yes) for no in nays: vote.no(no) vote.add_source(vote_url) yield vote
def _parse_senate_votes(self, vote_data, bill, url): vote_datetime = datetime.datetime.strptime(vote_data["voteDate"], "%Y-%m-%d") if vote_data["voteType"] == "FLOOR": motion = "Floor Vote" elif vote_data["voteType"] == "COMMITTEE": motion = "{} Vote".format(vote_data["committee"]["name"]) else: raise ValueError("Unknown vote type encountered.") if vote_data["version"]: motion += " - Version: " + vote_data["version"] vote = VoteEvent( chamber="upper", start_date=vote_datetime.strftime("%Y-%m-%d"), motion_text=motion, classification="passage", result="fail", bill=bill, ) vote.add_source(url) vote_rolls = vote_data["memberVotes"]["items"] yes_count, no_count, other_count = 0, 0, 0 # Count all yea votes. if "items" in vote_rolls.get("AYE", {}): for legislator in vote_rolls["AYE"]["items"]: vote.yes(legislator["fullName"]) yes_count += 1 if "items" in vote_rolls.get("AYEWR", {}): for legislator in vote_rolls["AYEWR"]["items"]: vote.yes(legislator["fullName"]) yes_count += 1 # Count all nay votes. if "items" in vote_rolls.get("NAY", {}): for legislator in vote_rolls["NAY"]["items"]: vote.no(legislator["fullName"]) no_count += 1 # Count all other types of votes. other_vote_types = ("EXC", "ABS", "ABD") for vote_type in other_vote_types: if vote_rolls.get(vote_type, []): for legislator in vote_rolls[vote_type]["items"]: vote.vote("other", legislator["fullName"]) other_count += 1 vote.result = "pass" if yes_count > no_count else "fail" vote.set_count("yes", yes_count) vote.set_count("no", no_count) vote.set_count("other", other_count) return vote
def test_vote_event_org_chamber(): ve = VoteEvent(legislative_session="2009", motion_text="passage of the bill", start_date="2009-01-07", result='pass', classification='bill-passage', chamber='upper') assert get_pseudo_id(ve.organization) == {'classification': 'upper'}
def toy_vote_event(): ve = VoteEvent(legislative_session="2009", motion_text="passage of the bill", start_date="2009-01-07", result='pass', classification='bill-passage') ve.add_source("http://uri.example.com/", note="foo") return ve
def scrape_vote(self, bill, date, motion, url): try: page = self.get(url).text if 'not yet official' in page: # Sometimes they link to vote pages before they go live pass else: page = lxml.html.fromstring(page) if url.endswith('Senate'): actor = 'upper' else: actor = 'lower' votevals = ['yes', 'no', 'not voting', 'other'] count_path = "string(//td[@align = 'center' and contains(., '%s: ')])" yes_count = int(page.xpath(count_path % "Yeas").split()[-1]) no_count = int(page.xpath(count_path % "Nays").split()[-1]) not_voting_count = int( page.xpath(count_path % "Non Voting").split()[-1]) other_count = int( page.xpath(count_path % "Present").split()[-1]) passed = yes_count > no_count + not_voting_count + other_count vote = VoteEvent(start_date='2017-03-04', motion_text=motion, result='pass' if passed else 'fail', classification='passage', chamber=actor, bill=bill) try: excused_count = int( page.xpath(count_path % "Excused").split()[-1]) vote.set_count('excused', excused_count) votevals.append('excused') except: pass vote.set_count('yes', yes_count) vote.set_count('no', no_count) vote.set_count('not voting', not_voting_count) vote.set_count('other', other_count) vote.add_source(url) xpath = ('//*[contains(@class, "ms-standardheader")]/' 'following-sibling::table') divs = page.xpath(xpath) for (voteval, div) in zip(votevals, divs): for a in div.xpath('.//a'): name = a.text_content().strip() if not name: continue else: vote.vote(voteval, name) yield vote except: # sometiems the link is there but is dead pass
def parse_roll_call(self, bill, link, chamber, date): url = link.attrib['href'] page = self.get(url).text page = lxml.html.fromstring(page) xpath = 'string(//div[@class="Column-OneFourth"]/div[3])' motion = page.xpath(xpath).strip() motion = re.sub(r'\s+', ' ', motion) if motion == 'FP': motion = 'FINAL PASSAGE' if motion == 'FINAL PASSAGE': type = 'passage' elif re.match(r'CONCUR(RENCE)? IN \w+ AMENDMENTS', motion): type = 'amendment' else: type = 'other' motion = link.text_content() yeas = int(page.xpath("//div[text() = 'YEAS']")[0].getnext().text) nays = int(page.xpath("//div[text() = 'NAYS']")[0].getnext().text) lve = int(page.xpath("//div[text() = 'LVE']")[0].getnext().text) nv = int(page.xpath("//div[text() = 'N/V']")[0].getnext().text) other = lve + nv vote = VoteEvent( chamber=chamber, start_date=tz.localize(date), motion_text=motion, classification=type, result='pass' if yeas > (nays + other) else 'fail', bill=bill, ) vote.add_source(url) vote.set_count('yes', yeas) vote.set_count('no', nays) vote.set_count('other', other) for div in page.xpath('//*[contains(@class, "RollCalls-Vote")]'): name = div.text_content().strip() name = re.sub(r'^[\s,]+', '', name) name = re.sub(r'[\s,]+$', '', name) class_attr = div.attrib['class'].lower() if 'yea' in class_attr: voteval = 'yes' elif 'nay' in class_attr: voteval = 'no' elif 'nvote' in class_attr: voteval = 'other' elif 'lve' in class_attr: voteval = 'other' else: msg = 'Unrecognized vote val: %s' % class_attr raise Exception(msg) vote.vote(voteval, name) return vote
def scrape_vote(self, bill, vote_id, session): vote_url = 'https://legis.delaware.gov/json/RollCall/GetRollCallVoteByRollCallId' form = { 'rollCallId': vote_id, 'sort': '', 'group': '', 'filter': '', } page = self.post(url=vote_url, data=form, allow_redirects=True).json() if page: roll = page['Model'] vote_chamber = self.chamber_map[roll['ChamberName']] # "7/1/16 01:00 AM" vote_date = dt.datetime.strptime( roll['TakenAtDateTime'], '%m/%d/%y %I:%M %p').strftime('%Y-%m-%d') # TODO: What does this code mean? vote_motion = roll['RollCallVoteType'] vote_passed = 'pass' if roll[ 'RollCallStatus'] == 'Passed' else 'fail' other_count = (int(roll['NotVotingCount']) + int(roll['VacantVoteCount']) + int(roll['AbsentVoteCount']) + int(roll['ConflictVoteCount'])) vote = VoteEvent(chamber=vote_chamber, start_date=vote_date, motion_text=vote_motion, result=vote_passed, classification='other', bill=bill, legislative_session=session) vote.add_source(vote_url) vote.set_count('yes', roll['YesVoteCount']) vote.set_count('no', roll['NoVoteCount']) vote.set_count('other', other_count) for row in roll['AssemblyMemberVotes']: # AssemblyMemberId looks like it should work here, # but for some sessions it's bugged to only return session try: voter = self.legislators_by_short[str(row['ShortName'])] name = voter['DisplayName'] except KeyError: self.warning('could not find legislator short name %s', row['ShortName']) name = row['ShortName'] if row['SelectVoteTypeCode'] == 'Y': vote.yes(name) elif row['SelectVoteTypeCode'] == 'N': vote.no(name) else: vote.vote('other', name) # bill.add_vote_event(vote) yield vote
def test_vote_event_org_obj(): o = Organization('something', classification='committee') ve = VoteEvent(legislative_session="2009", motion_text="passage of the bill", start_date="2009-01-07", result='pass', classification='bill-passage', organization=o) assert ve.organization == o._id
def scrape_vote(self, chamber, session, bill_id, vote_url): NO_VOTE_URL = 'http://www.house.leg.state.mn.us/votes/novotefound.asp' resp = self.get(vote_url) html = resp.text # sometimes the link is broken, will redirect to NO_VOTE_URL if resp.url == NO_VOTE_URL: return doc = lxml.html.fromstring(html) try: motion = doc.xpath("//div[@id='leg_PageContent']/div/h2/text()")[0] except IndexError: self.logger.warning("Bill was missing a motion number, skipping") return vote_count = doc.xpath( ".//div[@id='leg_PageContent']/div/h3/text()")[1].split() yeas = int(vote_count[0]) nays = int(vote_count[3]) # second paragraph has date paragraphs = doc.xpath(".//div[@id='leg_PageContent']/div/p/text()") date = None for p in paragraphs: try: date = datetime.datetime.strptime(p.strip(), '%m/%d/%Y').date() break except ValueError: pass if date is None: self.logger.warning("No date could be found for vote on %s" % motion) return vote = VoteEvent(chamber='lower', start_date=date, motion_text=motion, result='pass' if yeas > nays else 'fail', classification='passage', legislative_session=session, bill=bill_id, bill_chamber=chamber) vote.set_count('yes', yeas) vote.set_count('no', nays) vote.add_source(vote_url) vote.pupa_id = vote_url # first table has YEAs for name in doc.xpath('//table[1]/tr/td/font/text()'): vote.yes(name.strip()) # second table is nays for name in doc.xpath('//table[2]/tr/td/font/text()'): vote.no(name.strip()) yield vote
def scrape_vote(self, bill, vote_json, session): if vote_json['amendmentNumber']: motion = '{}: {}'.format( vote_json['amendmentNumber'], vote_json['action']) else: motion = vote_json['action'] result = 'pass' if vote_json['yesVotesCount'] > vote_json['noVotesCount'] else 'fail' v = VoteEvent( chamber=self.chamber_abbrev_map[vote_json['chamber']], start_date=self.parse_local_date(vote_json['voteDate']), motion_text=motion, result=result, legislative_session=session, bill=bill, classification='other', ) v.set_count(option='yes', value=vote_json['yesVotesCount']) v.set_count('no', vote_json['noVotesCount']) v.set_count('absent', vote_json['absentVotesCount']) v.set_count('excused', vote_json['excusedVotesCount']) v.set_count('other', vote_json['conflictVotesCount']) for name in vote_json['yesVotes'].split(','): if name.strip(): v.yes(name.strip()) for name in vote_json['noVotes'].split(','): if name.strip(): v.no(name.strip()) # add votes with other classifications # option can be 'yes', 'no', 'absent', # 'abstain', 'not voting', 'paired', 'excused' for name in vote_json['absentVotes'].split(','): if name.strip(): v.vote(option="absent", voter=name) for name in vote_json['excusedVotes'].split(','): if name.strip(): v.vote(option="excused", voter=name) for name in vote_json['conflictVotes'].split(','): if name.strip(): v.vote(option="other", voter=name) source_url = 'http://lso.wyoleg.gov/Legislation/{}/{}'.format( session, vote_json['billNumber']) v.add_source(source_url) yield v
def scrape_senate_vote(self, bill, url, date): try: filename, resp = self.urlretrieve(url) except scrapelib.HTTPError: self.warning("missing vote file %s" % url) return vote = VoteEvent( chamber='upper', start_date=date.strftime("%Y-%m-%d"), motion_text='Passage', # setting 'fail' for now. result='fail', classification='passage', bill=bill) vote.add_source(url) vote.pupa_id = url text = convert_pdf(filename, 'text').decode('utf-8') os.remove(filename) if re.search(r'Yea:\s+\d+\s+Nay:\s+\d+\s+Absent:\s+\d+', text): yield from self.scrape_senate_vote_3col(bill, vote, text, url, date) return data = re.split(r'(Yea|Nay|Absent)s?:', text)[::-1] data = filter(None, data) keymap = dict(yea='yes', nay='no') actual_vote = collections.defaultdict(int) vote_count = {'yes': 0, 'no': 0, 'other': 0} while True: if not data: break vote_val = data.pop() key = keymap.get(vote_val.lower(), 'other') values = data.pop() for name in re.split(r'(?:[\s,]+and\s|[\s,]{2,})', values): if name.lower().strip() == 'none.': continue name = name.replace('..', '') name = re.sub(r'\.$', '', name) name = name.strip('-1234567890 \n') if not name: continue vote.vote(key, name) actual_vote[vote_val] += 1 vote_count[key] += 1 assert actual_vote[vote_val] == vote_count[key] for key, value in vote_count.items(): vote.set_count(key, value) # updating result with actual value vote.result = 'pass' if vote_count['yes'] > ( vote_count['no'] + vote_count['other']) else 'fail' yield vote
def handle_page(self): (date, ) = self.doc.xpath( '//span[@id="ctl00_ContentPlaceHolder1_lblDate"]/text()') date = format_datetime( datetime.datetime.strptime(date, '%m/%d/%Y %I:%M:%S %p'), 'US/Eastern') totals = self.doc.xpath('//table//table')[-1].text_content() totals = re.sub(r'(?mu)\s+', " ", totals).strip() (yes_count, no_count, other_count) = [ int(x) for x in re.search( r'(?m)Total Yeas:\s+(\d+)\s+Total Nays:\s+(\d+)\s+' 'Total Missed:\s+(\d+)', totals).groups() ] result = 'pass' if yes_count > no_count else 'fail' (committee, ) = self.doc.xpath( '//span[@id="ctl00_ContentPlaceHolder1_lblCommittee"]/text()') (action, ) = self.doc.xpath( '//span[@id="ctl00_ContentPlaceHolder1_lblAction"]/text()') motion = "{} ({})".format(action, committee) vote = VoteEvent( start_date=date, bill=self.kwargs['bill'], chamber='lower', motion_text=motion, result=result, classification='committee', ) vote.add_source(self.url) vote.set_count('yes', yes_count) vote.set_count('no', no_count) vote.set_count('not voting', other_count) for member_vote in self.doc.xpath('//table//table//table//td'): if not member_vote.text_content().strip(): continue (member, ) = member_vote.xpath('span[2]//text()') (member_vote, ) = member_vote.xpath('span[1]//text()') if member_vote == "Y": vote.yes(member) elif member_vote == "N": vote.no(member) elif member_vote == "-": vote.vote('not voting', member) # Parenthetical votes appear to not be counted in the # totals for Yea, Nay, _or_ Missed elif re.search(r'\([YN]\)', member_vote): continue else: raise ValueError( "Unknown vote type found: {}".format(member_vote)) yield vote
def _parse_senate_votes(self, vote_data, bill, url): vote_datetime = datetime.datetime.strptime(vote_data['voteDate'], '%Y-%m-%d') if vote_data['voteType'] == 'FLOOR': motion = 'Floor Vote' elif vote_data['voteType'] == 'COMMITTEE': motion = '{} Vote'.format(vote_data['committee']['name']) else: raise ValueError('Unknown vote type encountered.') vote = VoteEvent( chamber='upper', start_date=vote_datetime.strftime('%Y-%m-%d'), motion_text=motion, classification='passage', result='fail', bill=bill, ) vote.add_source(url) vote_rolls = vote_data['memberVotes']['items'] yes_count, no_count, other_count = 0, 0, 0 # Count all yea votes. if 'items' in vote_rolls.get('AYE', {}): for legislator in vote_rolls['AYE']['items']: vote.yes(legislator['fullName']) yes_count += 1 if 'items' in vote_rolls.get('AYEWR', {}): for legislator in vote_rolls['AYEWR']['items']: vote.yes(legislator['fullName']) yes_count += 1 # Count all nay votes. if 'items' in vote_rolls.get('NAY', {}): for legislator in vote_rolls['NAY']['items']: vote.no(legislator['fullName']) no_count += 1 # Count all other types of votes. other_vote_types = ('EXC', 'ABS', 'ABD') for vote_type in other_vote_types: if vote_rolls.get(vote_type, []): for legislator in vote_rolls[vote_type]['items']: vote.vote('other', legislator['fullName']) other_count += 1 vote.result = 'pass' if yes_count > no_count else 'fail' vote.set_count('yes', yes_count) vote.set_count('no', no_count) vote.set_count('other', other_count) return vote
def parse_vote(self, bill, link): member_doc = lxml.html.fromstring(self.get(link).text) motion = member_doc.xpath("//div[@id='main_content']/h4/text()") opinions = member_doc.xpath("//div[@id='main_content']/h3/text()") if len(opinions) > 0: temp = opinions[0].split() vote_chamber = temp[0] vote_date = datetime.datetime.strptime(temp[-1], '%m/%d/%Y') vote_status = " ".join(temp[2:-2]) vote_status = vote_status if vote_status.strip() else motion[0] vote_chamber = 'upper' if vote_chamber == 'Senate' else 'lower' for i in opinions: try: count = int(i[i.find("(") + 1:i.find(")")]) except ValueError: # This is likely not a vote-count text chunk # It's probably '`On roll call the vote was:` pass else: if "yea" in i.lower(): yes_count = count elif "nay" in i.lower(): no_count = count elif "present" in i.lower(): p_count = count elif "absent" in i.lower(): a_count = count vote = VoteEvent( bill=bill, start_date=vote_date.strftime('%Y-%m-%d'), chamber=vote_chamber, motion_text=vote_status, result='pass' if yes_count > no_count else 'fail', classification='passage', ) vote.pupa_id = link vote.set_count('yes', yes_count) vote.set_count('no', no_count) vote.set_count('abstain', p_count) vote.set_count('absent', a_count) vote.add_source(link) a_links = member_doc.xpath("//div[@id='main_content']/a/text()") for i in range(1, len(a_links)): if i <= yes_count: vote.vote('yes', re.sub(',', '', a_links[i]).split()[0]) elif no_count != 0 and i > yes_count and i <= yes_count + no_count: vote.vote('no', re.sub(',', '', a_links[i]).split()[0]) else: vote.vote('other', re.sub(',', '', a_links[i]).split()[0]) yield vote else: self.warning("No Votes for: %s", link)
def test_org_and_chamber_conflict(): with pytest.raises(ValueError): VoteEvent(legislative_session="2009", motion_text="passage of the bill", start_date="2009-01-07", result='pass', classification='passage', organization='test', chamber='lower')
def test_vote_event_org_dict(): odict = {'name': 'Random Committee', 'classification': 'committee'} ve = VoteEvent(legislative_session="2009", motion_text="passage of the bill", start_date="2009-01-07", result='pass', classification='bill-passage', organization=odict) assert get_pseudo_id(ve.organization) == odict
def scrape_vote(self, bill, vote_json, session): if vote_json["amendmentNumber"]: motion = "{}: {}".format(vote_json["amendmentNumber"], vote_json["action"]) else: motion = vote_json["action"] result = ("pass" if vote_json["yesVotesCount"] > vote_json["noVotesCount"] else "fail") v = VoteEvent( chamber=self.chamber_abbrev_map[vote_json["chamber"]], start_date=self.parse_local_date(vote_json["voteDate"]), motion_text=motion, result=result, legislative_session=session, bill=bill, classification="other", ) v.set_count(option="yes", value=vote_json["yesVotesCount"]) v.set_count("no", vote_json["noVotesCount"]) v.set_count("absent", vote_json["absentVotesCount"]) v.set_count("excused", vote_json["excusedVotesCount"]) v.set_count("other", vote_json["conflictVotesCount"]) for name in vote_json["yesVotes"].split(","): if name.strip(): v.yes(name.strip()) for name in vote_json["noVotes"].split(","): if name.strip(): v.no(name.strip()) # add votes with other classifications # option can be 'yes', 'no', 'absent', # 'abstain', 'not voting', 'paired', 'excused' for name in vote_json["absentVotes"].split(","): if name.strip(): v.vote(option="absent", voter=name) for name in vote_json["excusedVotes"].split(","): if name.strip(): v.vote(option="excused", voter=name) for name in vote_json["conflictVotes"].split(","): if name.strip(): v.vote(option="other", voter=name) source_url = "http://lso.wyoleg.gov/Legislation/{}/{}".format( session, vote_json["billNumber"]) v.add_source(source_url) yield v
def handle_page(self): date, = self.doc.xpath('//span[contains(@id, "lblDate")]/text()') date = format_datetime( datetime.datetime.strptime(date, '%m/%d/%Y %I:%M:%S %p'), 'US/Eastern') yes_count = int( self.doc.xpath('//span[contains(@id, "lblYeas")]/text()')[0]) no_count = int( self.doc.xpath('//span[contains(@id, "lblNays")]/text()')[0]) other_count = int( self.doc.xpath('//span[contains(@id, "lblMissed")]/text()')[0]) result = 'pass' if yes_count > no_count else 'fail' committee, = self.doc.xpath( '//span[contains(@id, "lblCommittee")]/text()') action, = self.doc.xpath('//span[contains(@id, "lblAction")]/text()') motion = "{} ({})".format(action, committee) vote = VoteEvent( start_date=date, bill=self.kwargs['bill'], chamber='lower', motion_text=motion, result=result, classification='committee', ) vote.add_source(self.url) vote.set_count('yes', yes_count) vote.set_count('no', no_count) vote.set_count('not voting', other_count) for member_vote in self.doc.xpath( '//ul[contains(@class, "vote-list")]/li'): if not member_vote.text_content().strip(): continue member, = member_vote.xpath('span[2]//text()') member_vote, = member_vote.xpath('span[1]//text()') if member_vote == "Y": vote.yes(member) elif member_vote == "N": vote.no(member) elif member_vote == "-": vote.vote('not voting', member) # Parenthetical votes appear to not be counted in the # totals for Yea, Nay, _or_ Missed elif re.search(r'\([YN]\)', member_vote): continue else: raise ValueError( "Unknown vote type found: {}".format(member_vote)) yield vote