def run(options): bill_version_id = options.get("bill_version_id", None) if bill_version_id: bill_type, bill_number, congress, version_code = utils.split_bill_version_id( bill_version_id) bill_id = utils.build_bill_id(bill_type, bill_number, congress) else: version_code = None bill_id = options.get("bill_id", None) if bill_id: bill_type, bill_number, congress = utils.split_bill_id(bill_id) else: bill_type = bill_number = None congress = options.get("congress", utils.current_congress()) force = options.get("force", False) to_fetch = bill_version_ids_for(congress, bill_type, bill_number, version_code, force) if not to_fetch: return None saved_versions = utils.process_set(to_fetch, write_bill_catoxml, options)
def run(options): bill_id = options.get('bill_id', None) bill_version_id = options.get('bill_version_id', None) # using a specific bill or version overrides the congress flag/default if bill_id: bill_type, number, congress = utils.split_bill_id(bill_id) elif bill_version_id: bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id) else: congress = options.get('congress', utils.current_congress()) if bill_version_id: to_fetch = [bill_version_id] else: to_fetch = bill_version_ids_for(congress, options) if not to_fetch: logging.error("Error figuring out which bills to download, aborting.") return None limit = options.get('limit', None) if limit: to_fetch = to_fetch[:int(limit)] logging.warn("Going to fetch %i bill versions for congress #%s" % (len(to_fetch), congress)) saved_versions = utils.process_set(to_fetch, fetch_version, options)
def fetch_version(bill_version_id, options): logging.info("\n[%s] Fetching..." % bill_version_id) bill_type, number, congress, version_code = utils.split_bill_version_id( bill_version_id) # bill_id = "%s%s-%s" % (bill_type, number, congress) mods_filename = filename_for(bill_version_id) mods_cache = version_cache_for(bill_version_id, "mods.xml") issued_on, urls = fdsys.document_info_for(mods_filename, mods_cache, options) bill_version = { 'issued_on': issued_on, 'urls': urls, 'version_code': version_code, 'bill_version_id': bill_version_id } # 'bill_version_id': bill_version_id, # 'version_code': version_code utils.write( json.dumps(bill_version, sort_keys=True, indent=2, default=utils.format_datetime), output_for_bill_version(bill_version_id)) return {'ok': True, 'saved': True}
def extract_bill_version_metadata(package_name, text_path): bill_version_id = get_bill_id_for_package(package_name) bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id) bill_version = { 'bill_version_id': bill_version_id, 'version_code': version_code, 'urls': {}, } mods_ns = {"mods": "http://www.loc.gov/mods/v3"} doc = etree.parse(os.path.join(text_path, "mods.xml")) locations = doc.xpath("//mods:location/mods:url", namespaces=mods_ns) for location in locations: label = location.attrib['displayLabel'] if "HTML" in label: format = "html" elif "PDF" in label: format = "pdf" elif "XML" in label: format = "xml" else: format = "unknown" bill_version["urls"][format] = location.text bill_version["issued_on"] = doc.xpath("string(//mods:dateIssued)", namespaces=mods_ns) utils.write( json.dumps(bill_version, sort_keys=True, indent=2, default=utils.format_datetime), output_for_bill_version(bill_version_id) )
def fetch_version(bill_version_id, options): logging.info("\n[%s] Fetching..." % bill_version_id) bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id) # bill_id = "%s%s-%s" % (bill_type, number, congress) mods_filename = filename_for(bill_version_id) mods_cache = version_cache_for(bill_version_id, "mods.xml") issued_on, urls = fdsys.document_info_for(mods_filename, mods_cache, options) bill_version = { 'issued_on': issued_on, 'urls': urls, 'version_code': version_code, 'bill_version_id': bill_version_id } # 'bill_version_id': bill_version_id, # 'version_code': version_code utils.write( json.dumps(bill_version, sort_keys=True, indent=2, default=utils.format_datetime), output_for_bill_version(bill_version_id) ) return {'ok': True, 'saved': True}
def write_bill_version_metadata(bill_version_id): bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id) bill_version = { 'bill_version_id': bill_version_id, 'version_code': version_code, 'urls': { }, } mods_ns = {"mods": "http://www.loc.gov/mods/v3"} doc = etree.parse(document_filename_for(bill_version_id, "mods.xml")) locations = doc.xpath("//mods:location/mods:url", namespaces=mods_ns) for location in locations: label = location.attrib['displayLabel'] if "HTML" in label: format = "html" elif "PDF" in label: format = "pdf" elif "XML" in label: format = "xml" else: format = "unknown" bill_version["urls"][format] = location.text bill_version["issued_on"] = doc.xpath("string(//mods:dateIssued)", namespaces=mods_ns) utils.write( json.dumps(bill_version, sort_keys=True, indent=2, default=utils.format_datetime), output_for_bill_version(bill_version_id) ) return {'ok': True, 'saved': True}
def fetch_version(bill_version_id, options): # Download MODS etc. logging.info("\n[%s] Fetching..." % bill_version_id) bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id) # bill_id = "%s%s-%s" % (bill_type, number, congress) utils.download( mods_url_for(bill_version_id), document_filename_for(bill_version_id, "mods.xml"), utils.merge(options, {'binary': True, 'to_cache': False}) ) return write_bill_version_metadata(bill_version_id)
def run(options): bill_id = options.get('bill_id', None) bill_version_id = options.get('bill_version_id', None) # using a specific bill or version overrides the congress flag/default if bill_id: bill_type, number, congress = utils.split_bill_id(bill_id) elif bill_version_id: bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id) else: congress = options.get('congress', utils.current_congress()) if bill_version_id: to_fetch = [bill_version_id] else: to_fetch = bill_version_ids_for(congress, options) if not to_fetch: logging.error("Error figuring out which bills to download, aborting.") return None saved_versions = utils.process_set(to_fetch, write_bill_catoxml, options)
def run(options): bill_id = options.get('bill_id', None) bill_version_id = options.get('bill_version_id', None) # using a specific bill or version overrides the congress flag/default if bill_id: bill_type, number, congress = utils.split_bill_id(bill_id) elif bill_version_id: bill_type, number, congress, version_code = utils.split_bill_version_id( bill_version_id) else: congress = options.get('congress', utils.current_congress()) if bill_version_id: to_fetch = [bill_version_id] else: to_fetch = bill_version_ids_for(congress, options) if not to_fetch: logging.error( "Error figuring out which bills to download, aborting.") return None saved_versions = utils.process_set(to_fetch, write_bill_catoxml, options)
def run(options): bill_version_id = options.get("bill_version_id", None) if bill_version_id: bill_type, bill_number, congress, version_code = utils.split_bill_version_id(bill_version_id) bill_id = utils.build_bill_id(bill_type, bill_number, congress) else: version_code = None bill_id = options.get("bill_id", None) if bill_id: bill_type, bill_number, congress = utils.split_bill_id(bill_id) else: bill_type = bill_number = None congress = options.get("congress", utils.current_congress()) force = options.get("force", False) to_fetch = bill_version_ids_for(congress, bill_type, bill_number, version_code, force) if not to_fetch: return None saved_versions = utils.process_set(to_fetch, write_bill_catoxml, options)
def deepbills_url_for(bill_version_id): bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id) return "http://deepbills.cato.org/api/1/bill?congress=%s&billtype=%s&billnumber=%s&billversion=%s" % ( congress, bill_type, number, version_code )
def output_for_bill_version(bill_version_id): bill_type, number, congress, version_code = utils.split_bill_version_id( bill_version_id) return "%s/%s/bills/%s/%s%s/text-versions/%s/data.json" % ( utils.data_dir(), congress, bill_type, bill_type, number, version_code)
def filename_for(bill_version_id): bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id) return "BILLS-%s%s%s%s" % (congress, bill_type, number, version_code)
def document_filename_for(bill_version_id, filename): bill_type, number, congress, version_code = utils.split_bill_version_id( bill_version_id) return "%s/%s/bills/%s/%s%s/text-versions/%s/%s" % (utils.data_dir( ), congress, bill_type, bill_type, number, version_code, filename)
def mods_url_for(bill_version_id): bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id) return "http://www.gpo.gov/fdsys/pkg/BILLS-%s%s%s%s/mods.xml" % (congress, bill_type, number, version_code)
def document_filename_for(bill_version_id, filename): bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id) return "%s/%s/bills/%s/%s%s/text-versions/%s/%s" % (utils.data_dir(), congress, bill_type, bill_type, number, version_code, filename)
def output_for_bill_version(bill_version_id): bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id) return "%s/%s/bills/%s/%s%s/text-versions/%s/data.json" % (utils.data_dir(), congress, bill_type, bill_type, number, version_code)
def deepbills_url_for(bill_version_id): bill_type, number, congress, version_code = utils.split_bill_version_id( bill_version_id) return "http://deepbills.cato.org/api/1/bill?congress=%s&billtype=%s&billnumber=%s&billversion=%s" % ( congress, bill_type, number, version_code)
def filename_for(bill_version_id): bill_type, number, congress, version_code = utils.split_bill_version_id( bill_version_id) return "BILLS-%s%s%s%s" % (congress, bill_type, number, version_code)