Example #1
0
def run(options):
    bill_version_id = options.get("bill_version_id", None)

    if bill_version_id:
        bill_type, bill_number, congress, version_code = utils.split_bill_version_id(
            bill_version_id)
        bill_id = utils.build_bill_id(bill_type, bill_number, congress)
    else:
        version_code = None
        bill_id = options.get("bill_id", None)

        if bill_id:
            bill_type, bill_number, congress = utils.split_bill_id(bill_id)
        else:
            bill_type = bill_number = None
            congress = options.get("congress", utils.current_congress())

    force = options.get("force", False)

    to_fetch = bill_version_ids_for(congress, bill_type, bill_number,
                                    version_code, force)

    if not to_fetch:
        return None

    saved_versions = utils.process_set(to_fetch, write_bill_catoxml, options)
Example #2
0
def run(options):
    bill_id = options.get('bill_id', None)
    bill_version_id = options.get('bill_version_id', None)

    # using a specific bill or version overrides the congress flag/default
    if bill_id:
        bill_type, number, congress = utils.split_bill_id(bill_id)
    elif bill_version_id:
        bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id)
    else:
        congress = options.get('congress', utils.current_congress())

    if bill_version_id:
        to_fetch = [bill_version_id]
    else:
        to_fetch = bill_version_ids_for(congress, options)
        if not to_fetch:
            logging.error("Error figuring out which bills to download, aborting.")
            return None

    limit = options.get('limit', None)
    if limit:
        to_fetch = to_fetch[:int(limit)]

    logging.warn("Going to fetch %i bill versions for congress #%s" % (len(to_fetch), congress))

    saved_versions = utils.process_set(to_fetch, fetch_version, options)
Example #3
0
def fetch_version(bill_version_id, options):
    logging.info("\n[%s] Fetching..." % bill_version_id)

    bill_type, number, congress, version_code = utils.split_bill_version_id(
        bill_version_id)
    # bill_id = "%s%s-%s" % (bill_type, number, congress)

    mods_filename = filename_for(bill_version_id)
    mods_cache = version_cache_for(bill_version_id, "mods.xml")
    issued_on, urls = fdsys.document_info_for(mods_filename, mods_cache,
                                              options)

    bill_version = {
        'issued_on': issued_on,
        'urls': urls,
        'version_code': version_code,
        'bill_version_id': bill_version_id
    }

    # 'bill_version_id': bill_version_id,
    #   'version_code': version_code

    utils.write(
        json.dumps(bill_version,
                   sort_keys=True,
                   indent=2,
                   default=utils.format_datetime),
        output_for_bill_version(bill_version_id))

    return {'ok': True, 'saved': True}
Example #4
0
def extract_bill_version_metadata(package_name, text_path):
    bill_version_id = get_bill_id_for_package(package_name)

    bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id)

    bill_version = {
        'bill_version_id': bill_version_id,
        'version_code': version_code,
        'urls': {},
    }

    mods_ns = {"mods": "http://www.loc.gov/mods/v3"}
    doc = etree.parse(os.path.join(text_path, "mods.xml"))
    locations = doc.xpath("//mods:location/mods:url", namespaces=mods_ns)

    for location in locations:
        label = location.attrib['displayLabel']
        if "HTML" in label:
            format = "html"
        elif "PDF" in label:
            format = "pdf"
        elif "XML" in label:
            format = "xml"
        else:
            format = "unknown"
        bill_version["urls"][format] = location.text

    bill_version["issued_on"] = doc.xpath("string(//mods:dateIssued)", namespaces=mods_ns)

    utils.write(
        json.dumps(bill_version, sort_keys=True, indent=2, default=utils.format_datetime),
        output_for_bill_version(bill_version_id)
    )
Example #5
0
def fetch_version(bill_version_id, options):
  logging.info("\n[%s] Fetching..." % bill_version_id)
  
  bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id)
  # bill_id = "%s%s-%s" % (bill_type, number, congress)

  mods_filename = filename_for(bill_version_id)
  mods_cache = version_cache_for(bill_version_id, "mods.xml")
  issued_on, urls = fdsys.document_info_for(mods_filename, mods_cache, options)
  
  bill_version = {
    'issued_on': issued_on,
    'urls': urls,
    'version_code': version_code,
    'bill_version_id': bill_version_id
  }

  # 'bill_version_id': bill_version_id,
  #   'version_code': version_code

  utils.write(
    json.dumps(bill_version, sort_keys=True, indent=2, default=utils.format_datetime), 
    output_for_bill_version(bill_version_id)
  )

  return {'ok': True, 'saved': True}
Example #6
0
def run(options):
  bill_id = options.get('bill_id', None)
  bill_version_id = options.get('bill_version_id', None)

  # using a specific bill or version overrides the congress flag/default
  if bill_id:
    bill_type, number, congress = utils.split_bill_id(bill_id)
  elif bill_version_id:
    bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id)
  else:
    congress = options.get('congress', utils.current_congress())

  if bill_version_id:
    to_fetch = [bill_version_id]
  else:
    to_fetch = bill_version_ids_for(congress, options)
    if not to_fetch:
      logging.error("Error figuring out which bills to download, aborting.")
      return None

  limit = options.get('limit', None)
  if limit:
    to_fetch = to_fetch[:int(limit)]

  logging.warn("Going to fetch %i bill versions for congress #%s" % (len(to_fetch), congress))
  
  saved_versions = utils.process_set(to_fetch, fetch_version, options)
Example #7
0
def write_bill_version_metadata(bill_version_id):
  bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id)

  bill_version = {
    'bill_version_id': bill_version_id,
    'version_code': version_code,
    'urls': { },
  }

  mods_ns = {"mods": "http://www.loc.gov/mods/v3"}
  doc = etree.parse(document_filename_for(bill_version_id, "mods.xml"))
  locations = doc.xpath("//mods:location/mods:url", namespaces=mods_ns)

  for location in locations:
    label = location.attrib['displayLabel']
    if "HTML" in label:
      format = "html"
    elif "PDF" in label:
      format = "pdf"
    elif "XML" in label:
      format = "xml"
    else:
      format = "unknown"
    bill_version["urls"][format] = location.text

  bill_version["issued_on"] = doc.xpath("string(//mods:dateIssued)", namespaces=mods_ns)

  utils.write(
    json.dumps(bill_version, sort_keys=True, indent=2, default=utils.format_datetime), 
    output_for_bill_version(bill_version_id)
  )

  return {'ok': True, 'saved': True}
Example #8
0
def fetch_version(bill_version_id, options):
    # Download MODS etc.

    logging.info("\n[%s] Fetching..." % bill_version_id)

    bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id)
    # bill_id = "%s%s-%s" % (bill_type, number, congress)

    utils.download(
        mods_url_for(bill_version_id),
        document_filename_for(bill_version_id, "mods.xml"),
        utils.merge(options, {'binary': True, 'to_cache': False})
    )

    return write_bill_version_metadata(bill_version_id)
Example #9
0
def fetch_version(bill_version_id, options):
  # Download MODS etc.
	
  logging.info("\n[%s] Fetching..." % bill_version_id)
  
  bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id)
  # bill_id = "%s%s-%s" % (bill_type, number, congress)

  utils.download(
    mods_url_for(bill_version_id), 
    document_filename_for(bill_version_id, "mods.xml"),
    utils.merge(options, {'binary': True, 'to_cache': False})
  )
  
  return write_bill_version_metadata(bill_version_id)
Example #10
0
def run(options):
  bill_id = options.get('bill_id', None)
  bill_version_id = options.get('bill_version_id', None)

  # using a specific bill or version overrides the congress flag/default
  if bill_id:
    bill_type, number, congress = utils.split_bill_id(bill_id)
  elif bill_version_id:
    bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id)
  else:
    congress = options.get('congress', utils.current_congress())

  if bill_version_id:
    to_fetch = [bill_version_id]
  else:
    to_fetch = bill_version_ids_for(congress, options)
    if not to_fetch:
      logging.error("Error figuring out which bills to download, aborting.")
      return None

  saved_versions = utils.process_set(to_fetch, write_bill_catoxml, options)
Example #11
0
def run(options):
    bill_id = options.get('bill_id', None)
    bill_version_id = options.get('bill_version_id', None)

    # using a specific bill or version overrides the congress flag/default
    if bill_id:
        bill_type, number, congress = utils.split_bill_id(bill_id)
    elif bill_version_id:
        bill_type, number, congress, version_code = utils.split_bill_version_id(
            bill_version_id)
    else:
        congress = options.get('congress', utils.current_congress())

    if bill_version_id:
        to_fetch = [bill_version_id]
    else:
        to_fetch = bill_version_ids_for(congress, options)
        if not to_fetch:
            logging.error(
                "Error figuring out which bills to download, aborting.")
            return None

    saved_versions = utils.process_set(to_fetch, write_bill_catoxml, options)
Example #12
0
def run(options):
    bill_version_id = options.get("bill_version_id", None)

    if bill_version_id:
        bill_type, bill_number, congress, version_code = utils.split_bill_version_id(bill_version_id)
        bill_id = utils.build_bill_id(bill_type, bill_number, congress)
    else:
        version_code = None
        bill_id = options.get("bill_id", None)

        if bill_id:
            bill_type, bill_number, congress = utils.split_bill_id(bill_id)
        else:
            bill_type = bill_number = None
            congress = options.get("congress", utils.current_congress())

    force = options.get("force", False)

    to_fetch = bill_version_ids_for(congress, bill_type, bill_number, version_code, force)

    if not to_fetch:
        return None

    saved_versions = utils.process_set(to_fetch, write_bill_catoxml, options)
Example #13
0
def deepbills_url_for(bill_version_id):
  bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id)
  return "http://deepbills.cato.org/api/1/bill?congress=%s&billtype=%s&billnumber=%s&billversion=%s" % ( congress, bill_type, number, version_code )
Example #14
0
def output_for_bill_version(bill_version_id):
    bill_type, number, congress, version_code = utils.split_bill_version_id(
        bill_version_id)
    return "%s/%s/bills/%s/%s%s/text-versions/%s/data.json" % (
        utils.data_dir(), congress, bill_type, bill_type, number, version_code)
Example #15
0
def filename_for(bill_version_id):
  bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id)
  return "BILLS-%s%s%s%s" % (congress, bill_type, number, version_code)
Example #16
0
def document_filename_for(bill_version_id, filename):
    bill_type, number, congress, version_code = utils.split_bill_version_id(
        bill_version_id)
    return "%s/%s/bills/%s/%s%s/text-versions/%s/%s" % (utils.data_dir(
    ), congress, bill_type, bill_type, number, version_code, filename)
Example #17
0
def mods_url_for(bill_version_id):
    bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id)
    return "http://www.gpo.gov/fdsys/pkg/BILLS-%s%s%s%s/mods.xml" % (congress, bill_type, number, version_code)
Example #18
0
def document_filename_for(bill_version_id, filename):
  bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id)
  return "%s/%s/bills/%s/%s%s/text-versions/%s/%s" % (utils.data_dir(), congress, bill_type, bill_type, number, version_code, filename)
Example #19
0
def output_for_bill_version(bill_version_id):
  bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id)
  return "%s/%s/bills/%s/%s%s/text-versions/%s/data.json" % (utils.data_dir(), congress, bill_type, bill_type, number, version_code)
Example #20
0
def deepbills_url_for(bill_version_id):
    bill_type, number, congress, version_code = utils.split_bill_version_id(
        bill_version_id)
    return "http://deepbills.cato.org/api/1/bill?congress=%s&billtype=%s&billnumber=%s&billversion=%s" % (
        congress, bill_type, number, version_code)
Example #21
0
def mods_url_for(bill_version_id):
  bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id)
  return "http://www.gpo.gov/fdsys/pkg/BILLS-%s%s%s%s/mods.xml" % (congress, bill_type, number, version_code)
Example #22
0
def filename_for(bill_version_id):
    bill_type, number, congress, version_code = utils.split_bill_version_id(
        bill_version_id)
    return "BILLS-%s%s%s%s" % (congress, bill_type, number, version_code)