def run(options): amendment_id = options.get('amendment_id', None) bill_id = options.get('bill_id', None) search_state = {} if amendment_id: amendment_type, number, congress = utils.split_bill_id(amendment_id) to_fetch = [amendment_id] elif bill_id: # first, crawl the bill bill_type, number, congress = utils.split_bill_id(bill_id) bill_status = fetch_bill(bill_id, options) if bill_status['ok']: bill = json.loads(utils.read(output_for_bill(bill_id, "json"))) to_fetch = [x["amendment_id"] for x in bill["amendments"]] else: logging.error("Couldn't download information for that bill.") return None else: congress = options.get('congress', utils.current_congress()) to_fetch = bill_ids_for(congress, utils.merge(options, {'amendments': True}), bill_states=search_state) if not to_fetch: if options.get("fast", False): logging.warn("No amendments changed.") else: logging.error( "Error figuring out which amendments to download, aborting." ) return None limit = options.get('limit', None) if limit: to_fetch = to_fetch[:int(limit)] if options.get('pages_only', False): return None logging.warn("Going to fetch %i amendments from congress #%s" % (len(to_fetch), congress)) saved_amendments = utils.process_set(to_fetch, fetch_amendment, options) # keep record of the last state of all these amendments, for later fast-searching save_bill_search_state(saved_amendments, search_state)
def run(options): bill_id = options.get('bill_id', None) bill_version_id = options.get('bill_version_id', None) # using a specific bill or version overrides the congress flag/default if bill_id: bill_type, number, congress = utils.split_bill_id(bill_id) elif bill_version_id: bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id) else: congress = options.get('congress', utils.current_congress()) if bill_version_id: to_fetch = [bill_version_id] else: to_fetch = bill_version_ids_for(congress, options) if not to_fetch: logging.error("Error figuring out which bills to download, aborting.") return None limit = options.get('limit', None) if limit: to_fetch = to_fetch[:int(limit)] logging.warn("Going to fetch %i bill versions for congress #%s" % (len(to_fetch), congress)) saved_versions = utils.process_set(to_fetch, fetch_version, options)
def amendment_url_for(amendment_id): amendment_type, number, congress = utils.split_bill_id(amendment_id) thomas_type = utils.thomas_types[amendment_type][0] congress = int(congress) number = int(number) return "http://thomas.loc.gov/cgi-bin/bdquery/z?d%03d:%s%s:" % ( congress, thomas_type, number)
def output_for_bill(bill_id, format, is_data_dot=True): bill_type, number, congress = utils.split_bill_id(bill_id) if is_data_dot: fn = "data.%s" % format else: fn = format return "%s/%s/bills/%s/%s%s/%s" % (utils.data_dir(), congress, bill_type, bill_type, number, fn)
def run(options): bill_id = options.get('bill_id', None) search_state = {} if bill_id: bill_type, number, congress = utils.split_bill_id(bill_id) to_fetch = [bill_id] else: congress = options.get('congress', utils.current_congress()) to_fetch = bill_ids_for(congress, options, bill_states=search_state) if not to_fetch: if options.get("fast", False): logging.warn("No bills changed.") else: logging.error( "Error figuring out which bills to download, aborting.") return None limit = options.get('limit', None) if limit: to_fetch = to_fetch[:int(limit)] logging.warn("Going to fetch %i bills from congress #%s" % (len(to_fetch), congress)) saved_bills = utils.process_set(to_fetch, bill_info.fetch_bill, options) save_bill_search_state(saved_bills, search_state)
def run(options): bill_version_id = options.get("bill_version_id", None) if bill_version_id: bill_type, bill_number, congress, version_code = utils.split_bill_version_id( bill_version_id) bill_id = utils.build_bill_id(bill_type, bill_number, congress) else: version_code = None bill_id = options.get("bill_id", None) if bill_id: bill_type, bill_number, congress = utils.split_bill_id(bill_id) else: bill_type = bill_number = None congress = options.get("congress", utils.current_congress()) force = options.get("force", False) to_fetch = bill_version_ids_for(congress, bill_type, bill_number, version_code, force) if not to_fetch: return None saved_versions = utils.process_set(to_fetch, write_bill_catoxml, options)
def run(options): amdt_id = options.get('amendment_id', None) search_state = { } if amdt_id: amdt_type, number, congress = utils.split_bill_id(amdt_id) to_fetch = [amdt_id] else: congress = options.get('congress', utils.current_congress()) to_fetch = bill_ids_for(congress, utils.merge(options, {'amendments': True}), bill_states=search_state) if not to_fetch: if options.get("fast", False): logging.warn("No amendments changed.") else: logging.error("Error figuring out which amendments to download, aborting.") return None limit = options.get('limit', None) if limit: to_fetch = to_fetch[:int(limit)] if options.get('pages_only', False): return None logging.warn("Going to fetch %i amendments from congress #%s" % (len(to_fetch), congress)) saved_amendments = utils.process_set(to_fetch, fetch_amendment, options) save_bill_search_state(saved_amendments, search_state)
def run(options): bill_id = options.get('bill_id', None) search_state = {} if bill_id: bill_type, number, congress = utils.split_bill_id(bill_id) to_fetch = [bill_id] else: congress = options.get('congress', utils.current_congress()) to_fetch = bill_ids_for(congress, options, bill_states=search_state) if not to_fetch: if options.get("fast", False): logging.warn("No bills changed.") else: logging.error("Error figuring out which bills to download, aborting.") return None limit = options.get('limit', None) if limit: to_fetch = to_fetch[:int(limit)] logging.warn("Going to fetch %i bills from congress #%s" % (len(to_fetch), congress)) saved_bills = utils.process_set(to_fetch, bill_info.fetch_bill, options) save_bill_search_state(saved_bills, search_state)
def run(options): amendment_id = options.get('amendment_id', None) bill_id = options.get('bill_id', None) search_state = { } if amendment_id: amendment_type, number, congress = utils.split_bill_id(amendment_id) to_fetch = [amendment_id] elif bill_id: # first, crawl the bill bill_type, number, congress = utils.split_bill_id(bill_id) bill_status = fetch_bill(bill_id, options) if bill_status['ok']: bill = json.loads(utils.read(output_for_bill(bill_id, "json"))) to_fetch = [x["amendment_id"] for x in bill["amendments"]] else: logging.error("Couldn't download information for that bill.") return None else: congress = options.get('congress', utils.current_congress()) to_fetch = bill_ids_for(congress, utils.merge(options, {'amendments': True}), bill_states=search_state) if not to_fetch: if options.get("fast", False): logging.warn("No amendments changed.") else: logging.error("Error figuring out which amendments to download, aborting.") return None limit = options.get('limit', None) if limit: to_fetch = to_fetch[:int(limit)] if options.get('pages_only', False): return None logging.warn("Going to fetch %i amendments from congress #%s" % (len(to_fetch), congress)) saved_amendments = utils.process_set(to_fetch, fetch_amendment, options) # keep record of the last state of all these amendments, for later fast-searching save_bill_search_state(saved_amendments, search_state)
def fetch_amendment(amdt_id, options): logging.info("\n[%s] Fetching..." % amdt_id) # fetch bill details body body = utils.download( amdt_url_for(amdt_id), amdt_cache_for(amdt_id, "information.html"), options) if not body: return {'saved': False, 'ok': False, 'reason': "failed to download"} if options.get("download_only", False): return {'saved': False, 'ok': True, 'reason': "requested download only"} amdt_type, number, congress = utils.split_bill_id(amdt_id) actions = actions_for(body, amdt_id, is_amendment=True) if actions is None: actions = [] parse_amendment_actions(actions) amdt = { 'amendment_id': amdt_id, 'amendment_type': amdt_type, 'chamber': amdt_type[0], 'number': number, 'congress': congress, 'amends': amends_for(body, grab_bill=False), 'amends_bill': amends_for(body, grab_bill=True), 'house_number': house_number_for(body), 'offered_at': offered_at_for(body, 'offered'), 'submitted_at': offered_at_for(body, 'submitted'), 'proposed_at': offered_at_for(body, 'proposed'), 'sponsor': sponsor_for(body), 'title': amendment_simple_text_for(body, "title"), 'description': amendment_simple_text_for(body, "description"), 'purpose': amendment_simple_text_for(body, "purpose"), 'actions': actions, 'updated_at': datetime.datetime.fromtimestamp(time.time()), } set_amendment_status(amdt) output_amendment(amdt, options) return {'ok': True, 'saved': True}
def run(options): bill_id = options.get('bill_id', None) if bill_id: bill_type, number, congress = utils.split_bill_id(bill_id) to_fetch = [bill_id] else: congress = options.get('congress', utils.current_congress()) to_fetch = bill_ids_for(congress, options) if not to_fetch: logging.error("Error figuring out which bills to download, aborting.") return None limit = options.get('limit', None) if limit: to_fetch = to_fetch[:int(limit)] if options.get('pages_only', False): return None print "Going to fetch %i bills from congress #%s" % (len(to_fetch), congress) errors = [] saved = [] skips = [] for bill_id in to_fetch: try: results = bill_info.fetch_bill(bill_id, options) except Exception, e: if options.get('raise', False): raise else: errors.append((bill_id, e)) continue if results.get('ok', False): if results.get('saved', False): saved.append(bill_id) logging.info("[%s] Updated bill" % bill_id) else: skips.append(bill_id) logging.error("[%s] Skipping bill: %s" % (bill_id, results['reason'])) else: errors.append((bill_id, results)) logging.error("[%s] Error: %s" % (bill_id, results['reason']))
def run(options): bill_id = options.get('bill_id', None) if bill_id: bill_type, number, congress = utils.split_bill_id(bill_id) to_fetch = [bill_id] else: to_fetch = get_bills_to_process(options) if not to_fetch: logging.warn("No bills changed.") return None limit = options.get('limit', None) if limit: to_fetch = to_fetch[:int(limit)] utils.process_set(to_fetch, process_bill, options)
def run(options): bill_id = options.get('bill_id', None) bill_version_id = options.get('bill_version_id', None) # using a specific bill or version overrides the congress flag/default if bill_id: bill_type, number, congress = utils.split_bill_id(bill_id) elif bill_version_id: bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id) else: congress = options.get('congress', utils.current_congress()) if bill_version_id: to_fetch = [bill_version_id] else: to_fetch = bill_version_ids_for(congress, options) if not to_fetch: logging.error("Error figuring out which bills to download, aborting.") return None saved_versions = utils.process_set(to_fetch, write_bill_catoxml, options)
def run(options): bill_id = options.get('bill_id', None) bill_version_id = options.get('bill_version_id', None) # using a specific bill or version overrides the congress flag/default if bill_id: bill_type, number, congress = utils.split_bill_id(bill_id) elif bill_version_id: bill_type, number, congress, version_code = utils.split_bill_version_id( bill_version_id) else: congress = options.get('congress', utils.current_congress()) if bill_version_id: to_fetch = [bill_version_id] else: to_fetch = bill_version_ids_for(congress, options) if not to_fetch: logging.error( "Error figuring out which bills to download, aborting.") return None saved_versions = utils.process_set(to_fetch, write_bill_catoxml, options)
def run(options): bill_version_id = options.get("bill_version_id", None) if bill_version_id: bill_type, bill_number, congress, version_code = utils.split_bill_version_id(bill_version_id) bill_id = utils.build_bill_id(bill_type, bill_number, congress) else: version_code = None bill_id = options.get("bill_id", None) if bill_id: bill_type, bill_number, congress = utils.split_bill_id(bill_id) else: bill_type = bill_number = None congress = options.get("congress", utils.current_congress()) force = options.get("force", False) to_fetch = bill_version_ids_for(congress, bill_type, bill_number, version_code, force) if not to_fetch: return None saved_versions = utils.process_set(to_fetch, write_bill_catoxml, options)
def parse_bill_action(action_dict, prev_status, bill_id, title): """Parse a THOMAS bill action line. Returns attributes to be set in the XML file on the action line.""" bill_type, number, congress = utils.split_bill_id(bill_id) line = action_dict['text'] status = None action = { "type": "action" } # If a line starts with an amendment number, this action is on the amendment and cannot # be parsed yet. m = re.search(r"^(H|S)\.Amdt\.(\d+)", line, re.I) if m != None: # Process actions specific to amendments separately. return None, None # Otherwise, parse the action line for key actions. # VOTES # A House Vote. line = re.sub(", the Passed", ", Passed", line) # 106 h4733 and others m = re.search("(" + "|".join([ "On passage", "Passed House", "Two-thirds of the Members present having voted in the affirmative the bill is passed,?", "On motion to suspend the rules and pass the (?:bill|resolution)", "On agreeing to the (?:resolution|conference report)", "On motion to suspend the rules and agree to the (?:resolution|conference report)", "House Agreed to Senate Amendments.*?", "On motion (?:that )?the House (?:suspend the rules and )?(?:agree(?: with an amendment)? to|concur in) the Senate amendments?(?: to the House amendments?| to the Senate amendments?)*", ]) + ")" + "(, the objections of the President to the contrary notwithstanding.?)?" + "(, as amended| \(Amended\))?" + "\.? (Passed|Failed|Agreed to|Rejected)?" # hr1625-115 has a stray period here + " ?(by voice vote|without objection|by (the Yeas and Nays?|Yea-Nay Vote|recorded vote)" + "(:? \(2/3 required\))?: (\d+ ?- ?\d+(, \d+ Present)? [ \)]*)?\((Roll no\.|Record Vote No:) \d+\))", line, re.I) if m != None: motion, is_override, as_amended, pass_fail, how = m.group(1), m.group(2), m.group(3), m.group(4), m.group(5) # print line # print m.groups() if re.search(r"Passed House|House Agreed to", motion, re.I): pass_fail = 'pass' elif re.search("(ayes|yeas) had prevailed", line, re.I): pass_fail = 'pass' elif re.search(r"Pass|Agreed", pass_fail, re.I): pass_fail = 'pass' else: pass_fail = 'fail' if "Two-thirds of the Members present" in motion: is_override = True if is_override: vote_type = "override" elif re.search(r"(agree (with an amendment )?to|concur in) the Senate amendment", line, re.I): vote_type = "pingpong" elif re.search("conference report", line, re.I): vote_type = "conference" elif bill_type[0] == "h": vote_type = "vote" else: vote_type = "vote2" roll = None m = re.search(r"\((Roll no\.|Record Vote No:) (\d+)\)", how, re.I) if m != None: how = "roll" # normalize the ugly how roll = m.group(2) suspension = None if roll and "On motion to suspend the rules" in motion: suspension = True # alternate form of as amended, e.g. hr3979-113 if "the House agree with an amendment" in motion: as_amended = True action["type"] = "vote" action["vote_type"] = vote_type action["how"] = how action['where'] = "h" action['result'] = pass_fail if roll: action["roll"] = roll action["suspension"] = suspension # correct upstream data error if bill_id == "s2012-114" and "Roll no. 250" in line: as_amended = True if bill_id == "s2943-114" and "On passage Passed without objection" in line: as_amended = True # get the new status of the bill after this vote new_status = new_status_after_vote(vote_type, pass_fail == "pass", "h", bill_type, suspension, as_amended, title, prev_status) if new_status: status = new_status # Passed House, not necessarily by an actual vote (think "deem") m = re.search(r"Passed House pursuant to|House agreed to Senate amendment (with amendment )?pursuant to", line, re.I) if m != None: vote_type = "vote" if (bill_type[0] == "h") else "vote2" if "agreed to Senate amendment" in line: vote_type = "pingpong" pass_fail = "pass" as_amended = bool(m.group(1)) action["type"] = "vote" action["vote_type"] = vote_type action["how"] = "by special rule" action["where"] = "h" action["result"] = pass_fail # It's always pursuant to another bill, and a bill number is given in the action line, which we parse out # into the bill_ids field of the action. It's also represented # structurally in the links->link elements of the original XML which we just put in "links". # get the new status of the bill after this vote new_status = new_status_after_vote(vote_type, pass_fail == "pass", "h", bill_type, False, as_amended, title, prev_status) if new_status: status = new_status # House motions to table adversely dispose of a pending matter, if agreed to. An agreed-to "motion to table the measure", # which is very infrequent, kills the legislation. If not agreed to, nothing changes. So this regex only captures # agreed-to motions to table. m = re.search("On motion to table the measure Agreed to" + " ?(by voice vote|without objection|by (the Yeas and Nays|Yea-Nay Vote|recorded vote)" + ": (\d+ - \d+(, \d+ Present)? [ \)]*)?\((Roll no\.|Record Vote No:) \d+\))", line, re.I) if m != None: how = m.group(1) pass_fail = 'fail' # In order to classify this as resulting in the same thing as regular failed vote on passage, new_status_after_vote # needs to know if this was a vote in the originating chamber or not. if prev_status == "INTRODUCED" or bill_id.startswith("hres"): vote_type = "vote" elif False: vote_type = "vote2" else: raise Exception("Need to classify %s as being in the originating chamber or not." % prev_status) roll = None m = re.search(r"\((Roll no\.|Record Vote No:) (\d+)\)", how, re.I) if m != None: how = "roll" # normalize the ugly how roll = m.group(2) action["type"] = "vote" action["vote_type"] = vote_type action["how"] = how action['where'] = "h" action['result'] = pass_fail if roll: action["roll"] = roll # get the new status of the bill after this vote new_status = new_status_after_vote(vote_type, pass_fail == "pass", "h", bill_type, False, False, title, prev_status) if new_status: status = new_status # A Senate Vote # (There are some annoying weird cases of double spaces which are taken care of # at the end.) m = re.search("(" + "|".join([ "Passed Senate", "Failed of passage in Senate", "Disagreed to in Senate", "Resolution agreed to in Senate", "Senate (?:agreed to|concurred in) (?:the )?(?:conference report|House amendment(?: to the Senate amendments?| to the House amendments?)*)", "Senate receded from its amendment and concurred", # hr1-115 r"Cloture \S*\s?on the motion to proceed .*?not invoked in Senate", r"Cloture(?: motion)? on the motion to proceed to the (?:bill|measure) invoked in Senate", "Cloture invoked in Senate", "Cloture on (?:the motion to (?:proceed to |concur in )(?:the House amendment (?:to the Senate amendment )?to )?)(?:the bill|H.R. .*) (?:not )?invoked in Senate", "(?:Introduced|Received|Submitted) in the Senate, (?:read twice, |considered, |read the third time, )+and (?:passed|agreed to)", ]) + ")" + "(,?.*,?) " + "(without objection|by Unanimous Consent|by Voice Vote|(?:by )?Yea-Nay( Vote)?\. \d+\s*-\s*\d+\. Record Vote (No|Number): \d+)", line.replace(" ", " "), re.I) if m != None: motion, extra, how = m.group(1), m.group(2), m.group(3) roll = None # put disagreed check first, cause "agreed" is contained inside it if re.search("disagreed|not invoked", motion, re.I): pass_fail = "fail" elif re.search("passed|agreed|concurred|invoked", motion, re.I): pass_fail = "pass" else: pass_fail = "fail" voteaction_type = "vote" if re.search("over veto", extra, re.I): vote_type = "override" elif re.search("conference report", motion, re.I): vote_type = "conference" elif re.search("cloture", motion, re.I): vote_type = "cloture" voteaction_type = "vote-aux" # because it is not a vote on passage elif re.search("Senate agreed to (the )?House amendment|Senate concurred in (the )?House amendment", motion, re.I): vote_type = "pingpong" elif bill_type[0] == "s": vote_type = "vote" else: vote_type = "vote2" m = re.search(r"Record Vote (No|Number): (\d+)", how, re.I) if m != None: roll = m.group(2) how = "roll" as_amended = False if re.search(r"with amendments|with an amendment", extra, re.I): as_amended = True action["type"] = voteaction_type action["vote_type"] = vote_type action["how"] = how action["result"] = pass_fail action["where"] = "s" if roll: action["roll"] = roll # get the new status of the bill after this vote new_status = new_status_after_vote(vote_type, pass_fail == "pass", "s", bill_type, False, as_amended, title, prev_status) if new_status: status = new_status # OLD-STYLE VOTES (93rd Congress-ish) m = re.search(r"Measure passed (House|Senate)(, amended(?: \(.*?\)|, with an amendment to the title)?)?(?:,? in lieu[^,]*)?(?:, roll call #(\d+) \(\d+-\d+\))?", line, re.I) if m != None: chamber = m.group(1)[0].lower() # 'h' or 's' as_amended = m.group(2) roll_num = m.group(3) # GovTrack legacy scraper missed these: if chamber == 's' and (as_amended or roll_num or "lieu" in line): return action, status pass_fail = "pass" vote_type = "vote" if bill_type[0] == chamber else "vote2" action["type"] = "vote" action["vote_type"] = vote_type action["how"] = "(method not recorded)" if not roll_num else "roll" if roll_num: action["roll"] = roll_num action["result"] = pass_fail action["where"] = chamber new_status = new_status_after_vote(vote_type, pass_fail == "pass", chamber, bill_type, False, as_amended, title, prev_status) if new_status: status = new_status m = re.search(r"(House|Senate) agreed to (?:House|Senate) amendments?( with an amendment)?( under Suspension of the Rules)?(?:, roll call #(\d+) \(\d+-\d+\))?\.", line, re.I) if m != None: chamber = m.group(1)[0].lower() # 'h' or 's' as_amended = m.group(2) suspension = m.group(3) roll_num = m.group(4) # GovTrack legacy scraper missed these: if (chamber == 'h' and not roll_num) or (chamber == 's' and rull_num): return action, status # REMOVE ME pass_fail = "pass" vote_type = "pingpong" action["type"] = "vote" action["vote_type"] = vote_type action["how"] = "(method not recorded)" if not roll_num else "roll" if roll_num: action["roll"] = roll_num action["result"] = pass_fail action["where"] = chamber action["suspension"] = (suspension != None) new_status = new_status_after_vote(vote_type, pass_fail == "pass", chamber, bill_type, False, as_amended, title, prev_status) if new_status: status = new_status # PSUDO-REPORTING (because GovTrack did this, but should be changed) # TODO: Make a new status for this as pre-reported. m = re.search(r"Placed on (the )?([\w ]+) Calendar( under ([\w ]+))?[,\.] Calendar No\. (\d+)\.|Committee Agreed to Seek Consideration Under Suspension of the Rules|Ordered to be Reported", line, re.I) if m != None: # TODO: This makes no sense. if prev_status in ("INTRODUCED", "REFERRED"): status = "REPORTED" action["type"] = "calendar" # TODO: Useless. But good for GovTrack compatibility. if m.group(2): # not 'Ordered to be Reported' action["calendar"] = m.group(2) action["under"] = m.group(4) action["number"] = m.group(5) # COMMITTEE ACTIONS # reported m = re.search(r"Committee on (.*)\. Reported by", line, re.I) if m != None: action["type"] = "reported" action["committee"] = m.group(1) if prev_status in ("INTRODUCED", "REFERRED"): status = "REPORTED" m = re.search(r"Reported to Senate from the (.*?)( \(without written report\))?\.", line, re.I) if m != None: # 93rd Congress action["type"] = "reported" action["committee"] = m.group(1) if prev_status in ("INTRODUCED", "REFERRED"): status = "REPORTED" # hearings held by a committee m = re.search(r"(Committee on .*?)\. Hearings held", line, re.I) if m != None: action["committee"] = m.group(1) action["type"] = "hearings" m = re.search(r"Committee on (.*)\. Discharged (by Unanimous Consent)?", line, re.I) if m != None: action["committee"] = m.group(1) action["type"] = "discharged" if prev_status in ("INTRODUCED", "REFERRED"): status = "REPORTED" m = re.search("Cleared for White House|Presented to President", line, re.I) if m != None: action["type"] = "topresident" m = re.search("Signed by President", line, re.I) if m != None: action["type"] = "signed" status = "ENACTED:SIGNED" m = re.search("Pocket Vetoed by President", line, re.I) if m != None: action["type"] = "vetoed" action["pocket"] = "1" status = "VETOED:POCKET" # need to put this in an else, or this regex will match the pocket veto and override it else: m = re.search("Vetoed by President", line, re.I) if m != None: action["type"] = "vetoed" status = "PROV_KILL:VETO" m = re.search("Sent to Archivist of the United States unsigned", line, re.I) if m != None: status = "ENACTED:TENDAYRULE" m = re.search("^(?:Became )?(Public|Private) Law(?: No:)? ([\d\-]+)\.", line, re.I) if m != None: action["law"] = m.group(1).lower() pieces = m.group(2).split("-") action["congress"] = pieces[0] action["number"] = pieces[1] action["type"] = "enacted" if prev_status in ("ENACTED:SIGNED", "ENACTED:VETO_OVERRIDE", "ENACTED:TENDAYRULE"): pass # this is a final administrative step elif prev_status == "PROV_KILL:VETO" or prev_status.startswith("VETOED:"): # somehow missed the override steps status = "ENACTED:VETO_OVERRIDE" elif bill_id in ("s2641-93", "hr1589-94", "s2527-100", "hr1677-101", "hr2978-101", "hr2126-104", "s1322-104"): status = "ENACTED:TENDAYRULE" else: raise Exception("Missing Signed by President action? If this is a case of the 10-day rule, hard code the bill id %s here." % bill_id) # Check for referral type m = re.search(r"Referred to (?:the )?(House|Senate)?\s?(?:Committee|Subcommittee)?", line, re.I) if m != None: action["type"] = "referral" if prev_status == "INTRODUCED": status = "REFERRED" # sweep the action line for bill IDs of related bills bill_ids = utils.extract_bills(line, congress) bill_ids = filter(lambda b: b != bill_id, bill_ids) if bill_ids and (len(bill_ids) > 0): action['bill_ids'] = bill_ids return action, status
def fetch_amendment(amendment_id, options): logging.info("\n[%s] Fetching..." % amendment_id) body = utils.download( amendment_url_for(amendment_id), amendment_cache_for(amendment_id, "information.html"), options) if not body: return {'saved': False, 'ok': False, 'reason': "failed to download"} if options.get("download_only", False): return {'saved': False, 'ok': True, 'reason': "requested download only"} if "Amends:" not in body: return {'saved': False, 'ok': True, 'reason': "orphaned amendment"} amendment_type, number, congress = utils.split_bill_id(amendment_id) actions = actions_for(body, amendment_id, is_amendment=True) if actions is None: actions = [] parse_amendment_actions(actions) chamber = amendment_type[0] # good set of tests for each situation: # samdt712-113 - amendment to bill # samdt112-113 - amendment to amendment on bill # samdt4904-111 - amendment to treaty # samdt4922-111 - amendment to amendment to treaty amends_bill = amends_bill_for(body) # almost always present amends_treaty = amends_treaty_for(body) # present if bill is missing amends_amendment = amends_amendment_for(body) # sometimes present if not amends_bill and not amends_treaty: raise Exception("Choked finding out what bill or treaty the amendment amends.") amdt = { 'amendment_id': amendment_id, 'amendment_type': amendment_type, 'chamber': chamber, 'number': int(number), 'congress': congress, 'amends_bill': amends_bill, 'amends_treaty': amends_treaty, 'amends_amendment': amends_amendment, 'sponsor': sponsor_for(body), 'description': amendment_simple_text_for(body, "description"), 'purpose': amendment_simple_text_for(body, "purpose"), 'actions': actions, 'updated_at': datetime.datetime.fromtimestamp(time.time()), } if chamber == 'h': amdt['introduced_at'] = offered_at_for(body, 'offered') elif chamber == 's': amdt['introduced_at'] = offered_at_for(body, 'submitted') amdt['proposed_at'] = offered_at_for(body, 'proposed') if not amdt.get('introduced_at', None): raise Exception("Couldn't find a reliable introduction date for amendment.") # needs to come *after* the setting of introduced_at amdt['status'], amdt['status_at'] = amendment_status_for(amdt) # only set a house_number if it's a House bill - # this lets us choke if it's not found. if amdt['chamber'] == 'h': # numbers found in vote XML # summary = amdt['purpose'] if amdt['purpose'] else amdt['description'] # amdt['house_number'] = house_simple_number_for(amdt['amendment_id'], summary) if int(amdt['congress']) > 100: # A___-style numbers, present only starting with the 101st Congress amdt['house_number'] = house_number_for(body) output_amendment(amdt, options) return {'ok': True, 'saved': True}
def billstatus_url_for(bill_id): bill_type, bill_number, congress = utils.split_bill_id(bill_id) return govinfo.BULKDATA_BASE_URL + 'BILLSTATUS/{0}/{1}/BILLSTATUS-{0}{1}{2}.xml'.format(congress, bill_type, bill_number)
def amendment_url_for(amendment_id): amendment_type, number, congress = utils.split_bill_id(amendment_id) thomas_type = utils.thomas_types[amendment_type][0] congress = int(congress) number = int(number) return "http://thomas.loc.gov/cgi-bin/bdquery/z?d%03d:%s%s:" % (congress, thomas_type, number)
def amdt_url_for(amdt_id): amdt_type, number, congress = utils.split_bill_id(amdt_id) thomas_type = utils.thomas_types[amdt_type][0] congress = int(congress) number = int(number) return "http://thomas.loc.gov/cgi-bin/bdquery/D?d%03d:%d:./list/bss/d%03d%s.lst::" % (congress, number, congress, thomas_type)
def amendment_cache_for(amendment_id, file): amendment_type, number, congress = utils.split_bill_id(amendment_id) return "%s/amendments/%s/%s%s/%s" % (congress, amendment_type, amendment_type, number, file)
def billstatus_url_for(bill_id): bill_type, bill_number, congress = utils.split_bill_id(bill_id) return fdsys.BULKDATA_BASE_URL + 'BILLSTATUS/{0}/{1}/BILLSTATUS-{0}{1}{2}.xml'.format(congress, bill_type, bill_number)
def fetch_amendment(amendment_id, options): logging.info("\n[%s] Fetching..." % amendment_id) body = utils.download( amendment_url_for(amendment_id), amendment_cache_for(amendment_id, "information.html"), options) if not body: return {'saved': False, 'ok': False, 'reason': "failed to download"} if options.get("download_only", False): return { 'saved': False, 'ok': True, 'reason': "requested download only" } if "Amends:" not in body: return {'saved': False, 'ok': True, 'reason': "orphaned amendment"} amendment_type, number, congress = utils.split_bill_id(amendment_id) actions = actions_for(body, amendment_id, is_amendment=True) if actions is None: actions = [] parse_amendment_actions(actions) chamber = amendment_type[0] # good set of tests for each situation: # samdt712-113 - amendment to bill # samdt112-113 - amendment to amendment on bill # samdt4904-111 - amendment to treaty # samdt4922-111 - amendment to amendment to treaty amends_bill = amends_bill_for(body) # almost always present amends_treaty = amends_treaty_for(body) # present if bill is missing amends_amendment = amends_amendment_for(body) # sometimes present if not amends_bill and not amends_treaty: raise Exception( "Choked finding out what bill or treaty the amendment amends.") amdt = { 'amendment_id': amendment_id, 'amendment_type': amendment_type, 'chamber': chamber, 'number': int(number), 'congress': congress, 'amends_bill': amends_bill, 'amends_treaty': amends_treaty, 'amends_amendment': amends_amendment, 'sponsor': sponsor_for(body), 'description': amendment_simple_text_for(body, "description"), 'purpose': amendment_simple_text_for(body, "purpose"), 'actions': actions, 'updated_at': datetime.datetime.fromtimestamp(time.time()), } if chamber == 'h': amdt['introduced_at'] = offered_at_for(body, 'offered') elif chamber == 's': amdt['introduced_at'] = offered_at_for(body, 'submitted') amdt['proposed_at'] = offered_at_for(body, 'proposed') if not amdt.get('introduced_at', None): raise Exception( "Couldn't find a reliable introduction date for amendment.") # needs to come *after* the setting of introduced_at amdt['status'], amdt['status_at'] = amendment_status_for(amdt) # only set a house_number if it's a House bill - # this lets us choke if it's not found. if amdt['chamber'] == 'h': # numbers found in vote XML # summary = amdt['purpose'] if amdt['purpose'] else amdt['description'] # amdt['house_number'] = house_simple_number_for(amdt['amendment_id'], summary) if int(amdt['congress']) > 100: # A___-style numbers, present only starting with the 101st Congress amdt['house_number'] = house_number_for(body) output_amendment(amdt, options) return {'ok': True, 'saved': True}
def output_for_amdt(amendment_id, format): amendment_type, number, congress = utils.split_bill_id(amendment_id) return "%s/%s/amendments/%s/%s%s/%s" % (utils.data_dir(), congress, amendment_type, amendment_type, number, "data.%s" % format)
def create_govtrack_xml(bill, options): govtrack_type_codes = {'hr': 'h', 's': 's', 'hres': 'hr', 'sres': 'sr', 'hjres': 'hj', 'sjres': 'sj', 'hconres': 'hc', 'sconres': 'sc'} root = etree.Element("bill") root.set("session", bill['congress']) root.set("type", govtrack_type_codes[bill['bill_type']]) root.set("number", bill['number']) root.set("updated", utils.format_datetime(bill['updated_at'])) def make_node(parent, tag, text, **attrs): if options.get("govtrack", False): # Rewrite bioguide_id attributes as just id with GovTrack person IDs. attrs2 = {} for k, v in attrs.items(): if v: if k == "bioguide_id": # remap "bioguide_id" attributes to govtrack "id" k = "id" v = str(utils.translate_legislator_id('bioguide', v, 'govtrack')) attrs2[k] = v attrs = attrs2 return utils.make_node(parent, tag, text, **attrs) # for American Memory Century of Lawmaking bills... for source in bill.get("sources", []): n = make_node(root, "source", "") for k, v in sorted(source.items()): if k == "source": n.text = v elif k == "source_url": n.set("url", v) else: n.set(k, unicode(v)) if "original_bill_number" in bill: make_node(root, "bill-number", bill["original_bill_number"]) make_node(root, "state", bill['status'], datetime=bill['status_at']) old_status = make_node(root, "status", None) make_node(old_status, "introduced" if bill['status'] in ("INTRODUCED", "REFERRED") else "unknown", None, datetime=bill['status_at']) # dummy for the sake of comparison make_node(root, "introduced", None, datetime=bill['introduced_at']) titles = make_node(root, "titles", None) for title in bill['titles']: n = make_node(titles, "title", title['title']) n.set("type", title['type']) if title['as']: n.set("as", title['as']) if title['is_for_portion']: n.set("partial", "1") if bill['sponsor']: # TODO: Sponsored by committee? make_node(root, "sponsor", None, bioguide_id=bill['sponsor']['bioguide_id']) else: make_node(root, "sponsor", None) cosponsors = make_node(root, "cosponsors", None) for cosp in bill['cosponsors']: n = make_node(cosponsors, "cosponsor", None, bioguide_id=cosp["bioguide_id"]) if cosp["sponsored_at"]: n.set("joined", cosp["sponsored_at"]) if cosp["withdrawn_at"]: n.set("withdrawn", cosp["withdrawn_at"]) actions = make_node(root, "actions", None) for action in bill['actions']: a = make_node(actions, action['type'] if action['type'] in ("vote", "vote-aux", "calendar", "topresident", "signed", "enacted", "vetoed") else "action", None, datetime=action['acted_at']) if action.get("status"): a.set("state", action["status"]) if action['type'] in ('vote', 'vote-aux'): a.clear() # re-insert date between some of these attributes a.set("how", action["how"]) a.set("type", action["vote_type"]) if action.get("roll") != None: a.set("roll", action["roll"]) a.set("datetime", utils.format_datetime(action['acted_at'])) a.set("where", action["where"]) a.set("result", action["result"]) if action.get("suspension"): a.set("suspension", "1") if action.get("status"): a.set("state", action["status"]) if action['type'] == 'calendar' and "calendar" in action: a.set("calendar", action["calendar"]) if action["under"]: a.set("under", action["under"]) if action["number"]: a.set("number", action["number"]) if action['type'] == 'enacted': a.clear() # re-insert date between some of these attributes a.set("number", "%s-%s" % (bill['congress'], action["number"])) a.set("type", action["law"]) a.set("datetime", utils.format_datetime(action['acted_at'])) if action.get("status"): a.set("state", action["status"]) if action['type'] == 'vetoed': if action.get("pocket"): a.set("pocket", "1") if action.get('text'): make_node(a, "text", action['text']) if action.get('in_committee'): make_node(a, "committee", None, name=action['in_committee']) for cr in action['references']: make_node(a, "reference", None, ref=cr['reference'], label=cr['type']) committees = make_node(root, "committees", None) for cmt in bill['committees']: make_node(committees, "committee", None, code=(cmt["committee_id"] + cmt["subcommittee_id"]) if cmt.get("subcommittee_id", None) else cmt["committee_id"], name=cmt["committee"], subcommittee=cmt.get("subcommittee").replace("Subcommittee on ", "") if cmt.get("subcommittee") else "", activity=", ".join(c.title() for c in cmt["activity"])) relatedbills = make_node(root, "relatedbills", None) for rb in bill['related_bills']: if rb['type'] == "bill": rb_bill_type, rb_number, rb_congress = utils.split_bill_id(rb['bill_id']) make_node(relatedbills, "bill", None, session=rb_congress, type=govtrack_type_codes[rb_bill_type], number=rb_number, relation="unknown" if rb['reason'] == "related" else rb['reason']) subjects = make_node(root, "subjects", None) if bill['subjects_top_term']: make_node(subjects, "term", None, name=bill['subjects_top_term']) for s in bill['subjects']: if s != bill['subjects_top_term']: make_node(subjects, "term", None, name=s) amendments = make_node(root, "amendments", None) for amd in bill['amendments']: make_node(amendments, "amendment", None, number=amd["chamber"] + str(amd["number"])) if bill.get('summary'): make_node(root, "summary", bill['summary']['text'], date=bill['summary']['date'], status=bill['summary']['as']) committee_reports = make_node(root, "committee-reports", None) for report in bill['committee_reports']: make_node(committee_reports, "report", report) return etree.tostring(root, pretty_print=True)
def amdt_cache_for(amdt_id, file): amdt_type, number, congress = utils.split_bill_id(amdt_id) return "%s/amendments/%s/%s%s/%s" % (congress, amdt_type, amdt_type, number, file)