def run(options): vote_id = options.get('vote_id', None) if vote_id: vote_chamber, vote_number, congress, session_year = utils.split_vote_id(vote_id) to_fetch = [vote_id] else: congress = options.get('congress', utils.current_congress()) session_year = options.get('session', str(datetime.datetime.now().year)) to_fetch = vote_ids_for_house(congress, session_year, options) + vote_ids_for_senate(congress, session_year, options) if not to_fetch: if not options.get("fast", False): logging.error("Error figuring out which votes to download, aborting.") else: logging.error("No new or recent votes.") return None limit = options.get('limit', None) if limit: to_fetch = to_fetch[:int(limit)] if options.get('pages_only', False): return None logging.warn("Going to fetch %i votes from congress #%s session %s" % (len(to_fetch), congress, session_year)) utils.process_set(to_fetch, vote_info.fetch_vote, options)
def fetch_vote(vote_id, options): logging.info("\n[%s] Fetching..." % vote_id) vote_chamber, vote_number, vote_congress, vote_session_year = utils.split_vote_id(vote_id) if vote_chamber == "h": url = "http://clerk.house.gov/evs/%s/roll%03d.xml" % (vote_session_year, int(vote_number)) else: session_num = int(vote_session_year) - utils.get_congress_first_year(int(vote_congress)) + 1 url = "http://www.senate.gov/legislative/LIS/roll_call_votes/vote%d%d/vote_%d_%d_%05d.xml" % (int(vote_congress), session_num, int(vote_congress), session_num, int(vote_number)) # fetch vote XML page body = utils.download( url, "%s/votes/%s/%s%s/%s%s.xml" % (vote_congress, vote_session_year, vote_chamber, vote_number, vote_chamber, vote_number), utils.merge(options, {'binary': True}), ) if not body: return {'saved': False, 'ok': False, 'reason': "failed to download"} if options.get("download_only", False): return {'saved': False, 'ok': True, 'reason': "requested download only"} if "This vote was vacated" in body: # Vacated votes: 2011-484, 2012-327, ... # Remove file, since it may previously have existed with data. for f in (output_for_vote(vote_id, "json"), output_for_vote(vote_id, "xml")): if os.path.exists(f): os.unlink(f) return {'saved': False, 'ok': True, 'reason': "vote was vacated"} dom = etree.fromstring(body) vote = { 'vote_id': vote_id, 'chamber': vote_chamber, 'congress': int(vote_congress), 'session': vote_session_year, 'number': int(vote_number), 'updated_at': datetime.datetime.fromtimestamp(time.time()), 'source_url': url, } # do the heavy lifting if vote_chamber == "h": parse_house_vote(dom, vote) elif vote_chamber == "s": parse_senate_vote(dom, vote) # output and return output_vote(vote, options) return {'ok': True, 'saved': True}
def output_for_vote(vote_id, format): vote_chamber, vote_number, vote_congress, vote_session_year = utils.split_vote_id(vote_id) return "%s/%s/votes/%s/%s%s/%s" % ( utils.data_dir(), vote_congress, vote_session_year, vote_chamber, vote_number, "data.%s" % format, )
def run(options): vote_id = options.get('vote_id', None) if vote_id: vote_chamber, vote_number, congress, session_year = utils.split_vote_id( vote_id) to_fetch = [vote_id] else: congress = options.get('congress', None) if congress: session_year = options.get('session', None) if not session_year: logging.error( "If you provide a --congress, provide a --session year.") return None else: congress = utils.current_congress() session_year = options.get('session', str(datetime.datetime.now().year)) chamber = options.get('chamber', None) if chamber == "house": to_fetch = vote_ids_for_house(congress, session_year, options) elif chamber == "senate": to_fetch = vote_ids_for_senate(congress, session_year, options) else: to_fetch = vote_ids_for_house(congress, session_year, options) + vote_ids_for_senate( congress, session_year, options) if not to_fetch: if not options.get("fast", False): logging.error( "Error figuring out which votes to download, aborting.") else: logging.warn("No new or recent votes.") return None limit = options.get('limit', None) if limit: to_fetch = to_fetch[:int(limit)] if options.get('pages_only', False): return None logging.warn("Going to fetch %i votes from congress #%s session %s" % (len(to_fetch), congress, session_year)) utils.process_set(to_fetch, vote_info.fetch_vote, options)
def run(options): vote_id = options.get('vote_id', None) if vote_id: vote_chamber, vote_number, congress, session_year = utils.split_vote_id(vote_id) to_fetch = [vote_id] else: congress = options.get('congress', None) if congress: session_year = options.get('session', None) if not session_year: logging.error("If you provide a --congress, provide a --session year.") return None else: congress = utils.current_congress() session_year = options.get('session', str(utils.current_legislative_year())) chamber = options.get('chamber', None) if chamber == "house": to_fetch = vote_ids_for_house(congress, session_year, options) elif chamber == "senate": to_fetch = vote_ids_for_senate(congress, session_year, options) else: to_fetch = (vote_ids_for_house(congress, session_year, options) or []) + (vote_ids_for_senate(congress, session_year, options) or []) if not to_fetch: if not options.get("fast", False): logging.error("Error figuring out which votes to download, aborting.") else: logging.warn("No new or recent votes.") return None limit = options.get('limit', None) if limit: to_fetch = to_fetch[:int(limit)] if options.get('pages_only', False): return None logging.warn("Going to fetch %i votes from congress #%s session %s" % (len(to_fetch), congress, session_year)) utils.process_set(to_fetch, vote_info.fetch_vote, options)
def output_for_vote(vote_id, format): vote_chamber, vote_number, vote_congress, vote_session_year = utils.split_vote_id( vote_id) return "%s/%s/votes/%s/%s%s/%s" % (utils.data_dir(), vote_congress, vote_session_year, vote_chamber, vote_number, "data.%s" % format)