def run(options): vote_id = options.get('vote_id', None) if vote_id: vote_chamber, vote_number, congress, session_year = utils.split_vote_id(vote_id) to_fetch = [vote_id] else: congress = options.get('congress', None) if congress: session_year = options.get('session', None) if not session_year: logging.error("If you provide a --congress, provide a --session year.") return None else: congress = utils.current_congress() session_year = options.get('session', str(utils.current_legislative_year())) chamber = options.get('chamber', None) if chamber == "house": to_fetch = vote_ids_for_house(congress, session_year, options) elif chamber == "senate": to_fetch = vote_ids_for_senate(congress, session_year, options) else: to_fetch = (vote_ids_for_house(congress, session_year, options) or []) + (vote_ids_for_senate(congress, session_year, options) or []) if not to_fetch: if not options.get("fast", False): logging.error("Error figuring out which votes to download, aborting.") else: logging.warn("No new or recent votes.") return None limit = options.get('limit', None) if limit: to_fetch = to_fetch[:int(limit)] if options.get('pages_only', False): return None logging.warn("Going to fetch %i votes from congress #%s session %s" % (len(to_fetch), congress, session_year)) utils.process_set(to_fetch, vote_info.fetch_vote, options)
def fetch_senate_committee_meetings(committees, options): # Load any existing meetings file so we can recycle any GUIDs. existing_meetings = [] output_file = output_for("senate") if os.path.exists(output_file): existing_meetings = json.load(open(output_file)) options = dict(options) # clone options["binary"] = True # options["force"] = True meetings = [] dom = lxml.etree.fromstring( utils.download( "http://www.senate.gov/general/committee_schedules/hearings.xml", "committee_schedule/senate.xml", options)) for node in dom.xpath("meeting"): committee_id = str(node.xpath('string(cmte_code)')) if committee_id.strip() == "": continue # "No committee hearings scheduled" placeholder occurs_at = str(node.xpath('string(date)')) room = str(node.xpath('string(room)')) topic = str(node.xpath('string(matter)')) occurs_at = datetime.datetime.strptime(occurs_at, "%d-%b-%Y %I:%M %p") topic = re.sub(r"\s+", " ", topic).strip() # Validate committee code. try: committee_code, subcommittee_code = re.match( r"(\D+)(\d+)$", committee_id).groups() if committee_code not in committees: raise ValueError(committee_code) if subcommittee_code == "00": subcommittee_code = None if subcommittee_code and subcommittee_code not in committees[ committee_code]["subcommittees"]: raise ValueError(subcommittee_code) except: print("Invalid committee code", committee_id) continue # See if this meeting already exists. If so, take its GUID. # Assume meetings are the same if they are for the same committee/subcommittee and # at the same time. for mtg in existing_meetings: if mtg["committee"] == committee_code and mtg.get( "subcommittee", None) == subcommittee_code and mtg[ "occurs_at"] == occurs_at.isoformat(): if options.get("debug", False): print("[%s] Reusing gUID." % mtg["guid"]) guid = mtg["guid"] break else: # Not found, so create a new ID. # TODO: Can we make this a human-readable ID? guid = str(uuid.uuid4()) # Scrape the topic text for mentions of bill numbers. congress = utils.congress_from_legislative_year( utils.current_legislative_year(occurs_at)) bills = [] bill_number_re = re.compile( r"(hr|s|hconres|sconres|hjres|sjres|hres|sres)\s?(\d+)", re.I) for bill_match in bill_number_re.findall(topic.replace(".", "")): bills.append(bill_match[0].lower() + bill_match[1] + "-" + str(congress)) # Create the meeting event. if options.get("debug", False): print("[senate][%s][%s] Found meeting in room %s at %s." % (committee_code, subcommittee_code, room, occurs_at.isoformat())) meetings.append({ "chamber": "senate", "congress": congress, "guid": guid, "committee": committee_code, "subcommittee": subcommittee_code, "occurs_at": occurs_at.isoformat(), "room": room, "topic": topic, "bill_ids": bills, }) print("[senate] Found %i meetings." % len(meetings)) return meetings
def fetch_senate_committee_meetings(existing_meetings, committees, options): # Parse the Senate committee meeting XML feed for meetings. # To aid users of the data, attempt to assign GUIDs to meetings. options = dict(options) # clone options["binary"] = True meetings = [] dom = lxml.etree.fromstring(utils.download( "http://www.senate.gov/general/committee_schedules/hearings.xml", "committee_schedule/senate.xml", options)) for node in dom.xpath("meeting"): committee_id = unicode(node.xpath('string(cmte_code)')) if committee_id.strip() == "": continue # "No committee hearings scheduled" placeholder occurs_at = unicode(node.xpath('string(date)')) room = unicode(node.xpath('string(room)')) topic = unicode(node.xpath('string(matter)')) occurs_at = datetime.datetime.strptime(occurs_at, "%d-%b-%Y %I:%M %p") topic = re.sub(r"\s+", " ", topic).strip() # Validate committee code. try: committee_code, subcommittee_code = re.match(r"(\D+)(\d+)$", committee_id).groups() if committee_code not in committees: raise ValueError(committee_code) if subcommittee_code == "00": subcommittee_code = None if subcommittee_code and subcommittee_code not in committees[committee_code]["subcommittees"]: raise ValueError(subcommittee_code) except: print "Invalid committee code", committee_id continue # See if this meeting already exists. If so, take its GUID. # Assume meetings are the same if they are for the same committee/subcommittee and # at the same time. for mtg in existing_meetings: if mtg["committee"] == committee_code and mtg.get("subcommittee", None) == subcommittee_code and mtg["occurs_at"] == occurs_at.isoformat(): guid = mtg["guid"] break else: # Not found, so create a new ID. guid = unicode(uuid.uuid4()) # Scrape the topic text for mentions of bill numbers. congress = utils.congress_from_legislative_year(utils.current_legislative_year(occurs_at)) bills = [] bill_number_re = re.compile(r"(hr|s|hconres|sconres|hjres|sjres|hres|sres)\s?(\d+)", re.I) for bill_match in bill_number_re.findall(topic.replace(".", "")): bills.append( bill_match[0].lower() + bill_match[1] + "-" + str(congress) ) # Create the meeting event. meetings.append({ "chamber": "senate", "congress": congress, "guid": guid, "committee": committee_code, "subcommittee": subcommittee_code, "occurs_at": occurs_at.isoformat(), "room": room, "topic": topic, "bills": bills, }) return meetings
def fetch_senate_committee_meetings(existing_meetings, committees, options): # Parse the Senate committee meeting XML feed for meetings. # To aid users of the data, attempt to assign GUIDs to meetings. options = dict(options) # clone options["binary"] = True meetings = [] dom = lxml.etree.fromstring( utils.download( "http://www.senate.gov/general/committee_schedules/hearings.xml", "committee_schedule/senate.xml", options)) for node in dom.xpath("meeting"): committee_id = unicode(node.xpath('string(cmte_code)')) if committee_id.strip() == "": continue # "No committee hearings scheduled" placeholder occurs_at = unicode(node.xpath('string(date)')) room = unicode(node.xpath('string(room)')) topic = unicode(node.xpath('string(matter)')) occurs_at = datetime.datetime.strptime(occurs_at, "%d-%b-%Y %I:%M %p") topic = re.sub(r"\s+", " ", topic).strip() # Validate committee code. try: committee_code, subcommittee_code = re.match( r"(\D+)(\d+)$", committee_id).groups() if committee_code not in committees: raise ValueError(committee_code) if subcommittee_code == "00": subcommittee_code = None if subcommittee_code and subcommittee_code not in committees[ committee_code]["subcommittees"]: raise ValueError(subcommittee_code) except: print "Invalid committee code", committee_id continue # See if this meeting already exists. If so, take its GUID. # Assume meetings are the same if they are for the same committee/subcommittee and # at the same time. for mtg in existing_meetings: if mtg["committee"] == committee_code and mtg.get( "subcommittee", None) == subcommittee_code and mtg[ "occurs_at"] == occurs_at.isoformat(): guid = mtg["guid"] break else: # Not found, so create a new ID. guid = unicode(uuid.uuid4()) # Scrape the topic text for mentions of bill numbers. congress = utils.congress_from_legislative_year( utils.current_legislative_year(occurs_at)) bills = [] bill_number_re = re.compile( r"(hr|s|hconres|sconres|hjres|sjres|hres|sres)\s?(\d+)", re.I) for bill_match in bill_number_re.findall(topic.replace(".", "")): bills.append(bill_match[0].lower() + bill_match[1] + "-" + str(congress)) # Create the meeting event. meetings.append({ "chamber": "senate", "congress": congress, "guid": guid, "committee": committee_code, "subcommittee": subcommittee_code, "occurs_at": occurs_at.isoformat(), "room": room, "topic": topic, "bills": bills, }) return meetings
def fetch_senate_committee_meetings(committees, options): # Load any existing meetings file so we can recycle any GUIDs. existing_meetings = [] output_file = output_for("senate") if os.path.exists(output_file): existing_meetings = json.load(open(output_file)) options = dict(options) # clone options["binary"] = True # options["force"] = True meetings = [] dom = lxml.etree.fromstring( utils.download( "http://www.senate.gov/general/committee_schedules/hearings.xml", "committee_schedule/senate.xml", options ) ) for node in dom.xpath("meeting"): committee_id = unicode(node.xpath("string(cmte_code)")) if committee_id.strip() == "": continue # "No committee hearings scheduled" placeholder occurs_at = unicode(node.xpath("string(date)")) room = unicode(node.xpath("string(room)")) topic = unicode(node.xpath("string(matter)")) occurs_at = datetime.datetime.strptime(occurs_at, "%d-%b-%Y %I:%M %p") topic = re.sub(r"\s+", " ", topic).strip() # Validate committee code. try: committee_code, subcommittee_code = re.match(r"(\D+)(\d+)$", committee_id).groups() if committee_code not in committees: raise ValueError(committee_code) if subcommittee_code == "00": subcommittee_code = None if subcommittee_code and subcommittee_code not in committees[committee_code]["subcommittees"]: raise ValueError(subcommittee_code) except: print "Invalid committee code", committee_id continue # See if this meeting already exists. If so, take its GUID. # Assume meetings are the same if they are for the same committee/subcommittee and # at the same time. for mtg in existing_meetings: if ( mtg["committee"] == committee_code and mtg.get("subcommittee", None) == subcommittee_code and mtg["occurs_at"] == occurs_at.isoformat() ): if options.get("debug", False): print "[%s] Reusing gUID." % mtg["guid"] guid = mtg["guid"] break else: # Not found, so create a new ID. # TODO: Can we make this a human-readable ID? guid = unicode(uuid.uuid4()) # Scrape the topic text for mentions of bill numbers. congress = utils.congress_from_legislative_year(utils.current_legislative_year(occurs_at)) bills = [] bill_number_re = re.compile(r"(hr|s|hconres|sconres|hjres|sjres|hres|sres)\s?(\d+)", re.I) for bill_match in bill_number_re.findall(topic.replace(".", "")): bills.append(bill_match[0].lower() + bill_match[1] + "-" + str(congress)) # Create the meeting event. if options.get("debug", False): print "[senate][%s][%s] Found meeting in room %s at %s." % ( committee_code, subcommittee_code, room, occurs_at.isoformat(), ) meetings.append( { "chamber": "senate", "congress": congress, "guid": guid, "committee": committee_code, "subcommittee": subcommittee_code, "occurs_at": occurs_at.isoformat(), "room": room, "topic": topic, "bill_ids": bills, } ) print "[senate] Found %i meetings." % len(meetings) return meetings