def _parse_senate_votes(self, vote_data, bill, url): vote_datetime = datetime.datetime.strptime(vote_data["voteDate"], "%Y-%m-%d") if vote_data["voteType"] == "FLOOR": motion = "Floor Vote" elif vote_data["voteType"] == "COMMITTEE": motion = "{} Vote".format(vote_data["committee"]["name"]) else: raise ValueError("Unknown vote type encountered.") if vote_data["version"]: motion += " - Version: " + vote_data["version"] vote = VoteEvent( chamber="upper", start_date=vote_datetime.strftime("%Y-%m-%d"), motion_text=motion, classification="passage", result="fail", bill=bill, ) vote.add_source(url) vote_rolls = vote_data["memberVotes"]["items"] yes_count, no_count, other_count = 0, 0, 0 # Count all yea votes. if "items" in vote_rolls.get("AYE", {}): for legislator in vote_rolls["AYE"]["items"]: vote.yes(legislator["fullName"]) yes_count += 1 if "items" in vote_rolls.get("AYEWR", {}): for legislator in vote_rolls["AYEWR"]["items"]: vote.yes(legislator["fullName"]) yes_count += 1 # Count all nay votes. if "items" in vote_rolls.get("NAY", {}): for legislator in vote_rolls["NAY"]["items"]: vote.no(legislator["fullName"]) no_count += 1 # Count all other types of votes. other_vote_types = ("EXC", "ABS", "ABD") for vote_type in other_vote_types: if vote_rolls.get(vote_type, []): for legislator in vote_rolls[vote_type]["items"]: vote.vote("other", legislator["fullName"]) other_count += 1 vote.result = "pass" if yes_count > no_count else "fail" vote.set_count("yes", yes_count) vote.set_count("no", no_count) vote.set_count("other", other_count) return vote
def scrape_senate_vote(self, bill, url, date): try: filename, resp = self.urlretrieve(url) except scrapelib.HTTPError: self.warning("missing vote file %s" % url) return vote = VoteEvent( chamber="upper", start_date=date.strftime("%Y-%m-%d"), motion_text="Passage", # setting 'fail' for now. result="fail", classification="passage", bill=bill, ) vote.add_source(url) vote.pupa_id = url text = convert_pdf(filename, "text").decode("utf-8") os.remove(filename) if re.search(r"Yea:\s+\d+\s+Nay:\s+\d+\s+Absent:\s+\d+", text): yield from self.scrape_senate_vote_3col(bill, vote, text, url, date) return data = re.split(r"(Yea|Nay|Absent)s?:", text)[::-1] data = filter(None, data) keymap = dict(yea="yes", nay="no") actual_vote = collections.defaultdict(int) vote_count = {"yes": 0, "no": 0, "other": 0} while True: if not data: break vote_val = data.pop() key = keymap.get(vote_val.lower(), "other") values = data.pop() for name in re.split(r"(?:[\s,]+and\s|[\s,]{2,})", values): if name.lower().strip() == "none.": continue name = name.replace("..", "") name = re.sub(r"\.$", "", name) name = name.strip("-1234567890 \n") if not name: continue vote.vote(key, name) actual_vote[vote_val] += 1 vote_count[key] += 1 assert actual_vote[vote_val] == vote_count[key] for key, value in vote_count.items(): vote.set_count(key, value) # updating result with actual value vote.result = ("pass" if vote_count["yes"] > (vote_count["no"] + vote_count["other"]) else "fail") yield vote
def test_vote_event_bill_id_dedupe(): create_jurisdiction() bill = Bill.objects.create( id="bill-1", identifier="HB 1", legislative_session=LegislativeSession.objects.get(), from_organization=Organization.objects.get(classification="lower"), ) bill2 = Bill.objects.create( id="bill-2", identifier="HB 2", legislative_session=LegislativeSession.objects.get(), from_organization=Organization.objects.get(classification="lower"), ) vote_event = ScrapeVoteEvent( legislative_session="1900", start_date="2013", classification="anything", result="passed", motion_text="a vote on something", bill=bill.identifier, bill_chamber="lower", chamber="lower", ) bi = BillImporter("jid") _, what = VoteEventImporter("jid", bi).import_item(vote_event.as_dict()) assert what == "insert" assert VoteEvent.objects.count() == 1 # same exact vote event, no changes _, what = VoteEventImporter("jid", bi).import_item(vote_event.as_dict()) assert what == "noop" assert VoteEvent.objects.count() == 1 # new info, update vote_event.result = "failed" _, what = VoteEventImporter("jid", bi).import_item(vote_event.as_dict()) assert what == "update" assert VoteEvent.objects.count() == 1 # new vote event, insert vote_event = ScrapeVoteEvent( legislative_session="1900", start_date="2013", classification="anything", result="passed", motion_text="a vote on something", bill=bill2.identifier, bill_chamber="lower", chamber="lower", ) _, what = VoteEventImporter("jid", bi).import_item(vote_event.as_dict()) assert what == "insert" assert VoteEvent.objects.count() == 2
def test_vote_event_pupa_identifier_dedupe(): j = create_jurisdiction() j.legislative_sessions.create(name="1900", identifier="1900") Organization.objects.create(id="org-id", name="Legislature", classification="legislature", jurisdiction=j) vote_event = ScrapeVoteEvent( legislative_session="1900", start_date="2013", classification="anything", result="passed", motion_text="a vote on something", identifier="Roll Call No. 1", ) vote_event.pupa_id = "foo" dmi = DumbMockImporter() oi = OrganizationImporter("jid") bi = BillImporter("jid", dmi, oi) _, what = VoteEventImporter("jid", dmi, oi, bi).import_item(vote_event.as_dict()) assert what == "insert" assert VoteEvent.objects.count() == 1 # same exact vote event, no changes _, what = VoteEventImporter("jid", dmi, oi, bi).import_item(vote_event.as_dict()) assert what == "noop" assert VoteEvent.objects.count() == 1 # new info, update vote_event.result = "failed" _, what = VoteEventImporter("jid", dmi, oi, bi).import_item(vote_event.as_dict()) assert what == "update" assert VoteEvent.objects.count() == 1 # new bill identifier, update vote_event.identifier = "First Roll Call" _, what = VoteEventImporter("jid", dmi, oi, bi).import_item(vote_event.as_dict()) assert what == "update" assert VoteEvent.objects.count() == 1 # new identifier, insert vote_event.pupa_id = "bar" _, what = VoteEventImporter("jid", dmi, oi, bi).import_item(vote_event.as_dict()) assert what == "insert" assert VoteEvent.objects.count() == 2
def _parse_votes(self, url, vote, bill): """Given a vote url and a vote object, extract the voters and the vote counts from the vote page and update the vote object. """ if url.lower().endswith(".pdf"): try: resp = self.get(url) except HTTPError: # This vote document wasn't found. msg = "No document found at url %r" % url self.logger.warning(msg) return try: v = PDFCommitteeVote(url, resp.content, bill) return v.asvote() except PDFCommitteeVoteParseError: # Warn and skip. self.warning("Could't parse committee vote at %r" % url) return html = self.get(url).text doc = lxml.html.fromstring(html) doc.make_links_absolute(url) # Yes, no, excused, absent. try: vals = doc.xpath("//table")[1].xpath("tr/td/text()") except IndexError: # Most likely was a bogus link lacking vote data. return yes_count, no_count, excused_count, absent_count = map(int, vals) # Get the motion. try: motion = doc.xpath("//br")[-1].tail.strip() except IndexError: # Some of them mysteriously have no motion listed. motion = vote["action"] if not motion: motion = vote["action"] vote["motion"] = motion action = vote["action"] vote_url = vote["vote_url"] vote = VoteEvent( chamber=vote["chamber"], start_date=vote["date"], motion_text=vote["motion"], result="fail", # placeholder classification="passage", bill=bill, bill_action=vote["action"], ) vote.dedupe_key = vote_url # URL contains sequence number vote.add_source(vote_url) vote.set_count("yes", yes_count) vote.set_count("no", no_count) vote.set_count("excused", excused_count) vote.set_count("absent", absent_count) for text in doc.xpath("//table")[2].xpath("tr/td/text()"): if not text.strip("\xa0"): continue v, name = filter(None, text.split("\xa0")) # Considering Name is brackets as short name regex = re.compile(r".*?\((.*?)\)") short_name = re.findall(regex, name) if len(short_name) > 0: note = "Short Name: " + short_name[0] else: note = "" # Name without brackets like 'Kary, Douglas' name = re.sub(r"[\(\[].*?[\)\]]", "", name) if v == "Y": vote.yes(name, note=note) elif v == "N": vote.no(name, note=note) elif v == "E": vote.vote("excused", name, note=note) elif v == "A": vote.vote("absent", name, note=note) # code to determine value of `passed` passed = None # some actions take a super majority, so we aren't just # comparing the yeas and nays here. for i in vote_passage_indicators: if i in action: passed = True break for i in vote_failure_indicators: if i in action and passed: # a quick explanation: originally an exception was # thrown if both passage and failure indicators were # present because I thought that would be a bug in my # lists. Then I found 2007 HB 160. # Now passed = False if the nays outnumber the yays.. # I won't automatically mark it as passed if the yays # ounumber the nays because I don't know what requires # a supermajority in MT. if no_count >= yes_count: passed = False break else: raise Exception("passage and failure indicator" "both present at: %s" % url) if i in action and passed is None: passed = False break for i in vote_ambiguous_indicators: if i in action: passed = yes_count > no_count break if passed is None: raise Exception("Unknown passage at: %s" % url) vote.result = "pass" if passed else "fail" return vote