def scrape_chamber(self, chamber, session): # Pull the session metadata so we can get the # slug for the API Request meta = next(each for each in self.jurisdiction.legislative_sessions if each["identifier"] == session) if meta["classification"] == "special": list_slug = self.special_slugs[session] else: list_slug = 'li' list_url = "http://www.kslegislature.org/{}" \ "/api/v11/rev-1/bill_status" list_url = list_url.format(list_slug) chamber_name = "Senate" if chamber == "upper" else "House" chamber_letter = chamber_name[0] # perhaps we should save this data so we can make one request for both? bill_request = self.get(list_url).text bill_request_json = json.loads(bill_request) bills = bill_request_json["content"] # there are duplicates seen_ids = set() for bill_data in bills: bill_id = bill_data["BILLNO"] # filter other chambers if not bill_id.startswith(chamber_letter): continue # filter duplicates if bill_id in seen_ids: continue seen_ids.add(bill_id) if "CR" in bill_id: btype = "concurrent resolution" elif "R" in bill_id: btype = "resolution" elif "B" in bill_id: btype = "bill" title = bill_data["SHORTTITLE"] or bill_data["LONGTITLE"] # main bill = Bill(bill_id, session, title, chamber=chamber, classification=btype) bill.extras = {"status": bill_data["STATUS"]} bill.add_source(ksapi.url + "bill_status/" + bill_id.lower()) if bill_data["LONGTITLE"] and bill_data["LONGTITLE"] != bill.title: bill.add_title(bill_data["LONGTITLE"]) # An "original sponsor" is the API's expression of "primary sponsor" for primary_sponsor in bill_data["ORIGINAL_SPONSOR"]: primary_sponsor = self.clean_sponsor_name(primary_sponsor) bill.add_sponsorship( name=primary_sponsor, entity_type="organization" if "committee" in primary_sponsor.lower() else "person", primary=True, classification="original sponsor", ) for sponsor in bill_data["SPONSOR_NAMES"]: if sponsor in bill_data["ORIGINAL_SPONSOR"]: continue sponsor = self.clean_sponsor_name(sponsor) bill.add_sponsorship( name=sponsor, entity_type="organization" if "committee" in sponsor.lower() else "person", primary=False, classification="cosponsor", ) # history is backwards for event in reversed(bill_data["HISTORY"]): actor = "upper" if event["chamber"] == "Senate" else "lower" date = event["session_date"] # append committee names if present if "committee_names" in event: action = (event["status"] + " " + " and ".join(event["committee_names"])) else: action = event["status"] if event["action_code"] not in ksapi.action_codes: self.warning( "unknown action code on %s: %s %s" % (bill_id, event["action_code"], event["status"])) atype = None else: atype = ksapi.action_codes[event["action_code"]] bill.add_action(action, date, chamber=actor, classification=atype) # Versions are exposed in `bill_data['versions'], # but lack any descriptive text or identifiers; # continue to scrape these from the HTML yield from self.scrape_html(bill, session) yield bill
def scrape(self, session=None, chamber=None): bill_type_map = { "B": "bill", "R": "resolution", "JR": "joint resolution", "CR": "concurrent resolution", } chamber_map = { "H": "lower", "S": "upper", "J": "joint", "E": "legislature", # Effective date } action_code_map = { "HI": None, "SI": None, "HH": None, "SH": None, "HPF": ["introduction"], "HDSAS": None, "SPF": ["introduction"], "HSR": ["reading-2"], "SSR": ["reading-2"], "HFR": ["reading-1"], "SFR": ["reading-1"], "HRECM": ["withdrawal", "referral-committee"], "SRECM": ["withdrawal", "referral-committee"], "SW&C": ["withdrawal", "referral-committee"], "HW&C": ["withdrawal", "referral-committee"], "HRA": ["passage"], "SRA": ["passage"], "HPA": ["passage"], "HRECO": None, "SPA": ["passage"], "HTABL": None, # 'House Tabled' - what is this? "SDHAS": None, "HCFR": ["committee-passage-favorable"], "SCFR": ["committee-passage-favorable"], "HRAR": ["referral-committee"], "SRAR": ["referral-committee"], "STR": ["reading-3"], "SAHAS": None, "SE": ["passage"], "SR": ["referral-committee"], "HTRL": ["reading-3", "failure"], "HTR": ["reading-3"], "S3RLT": ["reading-3", "failure"], "HASAS": None, "S3RPP": None, "STAB": None, "SRECO": None, "SAPPT": None, "HCA": None, "HNOM": None, "HTT": None, "STT": None, "SRECP": None, "SCRA": None, "SNOM": None, "S2R": ["reading-2"], "H2R": ["reading-2"], "SENG": ["passage"], "HENG": ["passage"], "HPOST": None, "HCAP": None, "SDSG": ["executive-signature"], "SSG": ["executive-receipt"], "Signed Gov": ["executive-signature"], "HDSG": ["executive-signature"], "HSG": ["executive-receipt"], "EFF": None, "HRP": None, "STH": None, "HTS": None, } if not session: session = self.latest_session() self.info("no session specified, using %s", session) sid = SESSION_SITE_IDS[session] legislation = backoff(self.lservice.GetLegislationForSession, sid)[ "LegislationIndex" ] for leg in legislation: lid = leg["Id"] instrument = backoff(self.lservice.GetLegislationDetail, lid) history = [x for x in instrument["StatusHistory"][0]] actions = reversed( [ { "code": x["Code"], "action": x["Description"], "_guid": x["Id"], "date": x["Date"], } for x in history ] ) guid = instrument["Id"] # A little bit hacky. bill_prefix = instrument["DocumentType"] bill_chamber = chamber_map[bill_prefix[0]] bill_type = bill_type_map[bill_prefix[1:]] bill_id = "%s %s" % (bill_prefix, instrument["Number"]) if instrument["Suffix"]: bill_id += instrument["Suffix"] title = instrument["Caption"] description = instrument["Summary"] if title is None: continue bill = Bill( bill_id, legislative_session=session, chamber=bill_chamber, title=title, classification=bill_type, ) bill.add_abstract(description, note="description") bill.extras = {"guid": guid} if instrument["Votes"]: for vote_ in instrument["Votes"]: _, vote_ = vote_ vote_ = backoff(self.vservice.GetVote, vote_[0]["VoteId"]) vote = VoteEvent( start_date=vote_["Date"].strftime("%Y-%m-%d"), motion_text=vote_["Caption"] or "Vote on Bill", chamber={"House": "lower", "Senate": "upper"}[vote_["Branch"]], result="pass" if vote_["Yeas"] > vote_["Nays"] else "fail", classification="passage", bill=bill, ) vote.set_count("yes", vote_["Yeas"]) vote.set_count("no", vote_["Nays"]) vote.set_count("other", vote_["Excused"] + vote_["NotVoting"]) vote.add_source(self.vsource) methods = {"Yea": "yes", "Nay": "no"} if vote_["Votes"] is not None: for vdetail in vote_["Votes"][0]: whom = vdetail["Member"] how = vdetail["MemberVoted"] if whom["Name"] == "VACANT": continue name, district = vote_name_pattern.search( whom["Name"] ).groups() vote.vote(methods.get(how, "other"), name, note=district) yield vote ccommittees = defaultdict(list) committees = instrument["Committees"] if committees: for committee in committees[0]: ccommittees[ {"House": "lower", "Senate": "upper"}[committee["Type"]] ].append(committee["Name"]) for action in actions: action_chamber = chamber_map[action["code"][0]] try: action_types = action_code_map[action["code"]] except KeyError: error_msg = "Code {code} for action {action} not recognized.".format( code=action["code"], action=action["action"] ) self.logger.warning(error_msg) action_types = None committees = [] if action_types and any(("committee" in x for x in action_types)): committees = [str(x) for x in ccommittees.get(action_chamber, [])] act = bill.add_action( action["action"], action["date"].strftime("%Y-%m-%d"), classification=action_types, chamber=action_chamber, ) for committee in committees: act.add_related_entity(committee, "organization") act.extras = {"code": action["code"], "guid": action["_guid"]} sponsors = [] if instrument["Authors"]: sponsors = instrument["Authors"]["Sponsorship"] if "Sponsors" in instrument and instrument["Sponsors"]: sponsors += instrument["Sponsors"]["Sponsorship"] sponsors = [(x["Type"], self.get_member(x["MemberId"])) for x in sponsors] for typ, sponsor in sponsors: name = "{First} {Last}".format(**dict(sponsor["Name"])) bill.add_sponsorship( name, entity_type="person", classification="primary" if "Author" in typ else "secondary", primary="Author" in typ, ) for version in instrument["Versions"]["DocumentDescription"]: name, url, doc_id, version_id = [ version[x] for x in ["Description", "Url", "Id", "Version"] ] link = bill.add_version_link(name, url, media_type="application/pdf") link["extras"] = { "_internal_document_id": doc_id, "_version_id": version_id, } bill.add_source(self.msource) bill.add_source(self.lsource) bill.add_source(SOURCE_URL.format(**{"session": session, "bid": guid})) yield bill