def fetch_nomination(nomination_id, options={}):
	logging.info("\n[%s] Fetching..." % nomination_id)

	# fetch committee name map, if it doesn't already exist
	nomination_type, number, congress = utils.split_nomination_id(nomination_id)
	if not number:
		return {'saved': False, 'ok': False, 'reason': "Couldn't parse %s" % nomination_id}

	if not utils.committee_names:
		utils.fetch_committee_names(congress, options)

	# fetch bill details body
	body = utils.download(
		nomination_url_for(nomination_id),
		nomination_cache_for(nomination_id, "information.html"), options)

	if not body:
		return {'saved': False, 'ok': False, 'reason': "failed to download"}

	if options.get("download_only", False):
		return {'saved': False, 'ok': True, 'reason': "requested download only"}

	# TODO:
	#   detect group nominations, particularly for military promotions
	#   detect when a group nomination is split into subnominations
	#
	# Also, the splitting process is nonsense:
	# http://thomas.loc.gov/home/PN/split.htm

	if "split into two or more parts" in body:
		return {'saved': False, 'ok': True, 'reason': 'was split'}

	nomination = parse_nomination(nomination_id, body, options)
	output_nomination(nomination, options)
	return {'ok': True, 'saved': True}
Exemple #2
0
def fetch_nomination(nomination_id, options={}):
    logging.info("\n[%s] Fetching..." % nomination_id)

    # fetch committee name map, if it doesn't already exist
    nomination_type, number, congress = utils.split_nomination_id(nomination_id)
    if not number:
        return {'saved': False, 'ok': False, 'reason': "Couldn't parse %s" % nomination_id}

    if not utils.committee_names:
        utils.fetch_committee_names(congress, options)

    # fetch bill details body
    body = utils.download(
        nomination_url_for(nomination_id),
        nomination_cache_for(nomination_id, "information.html"), options)

    if not body:
        return {'saved': False, 'ok': False, 'reason': "failed to download"}

    if options.get("download_only", False):
        return {'saved': False, 'ok': True, 'reason': "requested download only"}

    '''
  # TO DO
  ## detect group nominations, particularly for military promotions
  ## detect when a group nomination is split into sub nominations because of divergent Senate action
  '''

    nomination = parse_nomination(nomination_id, body, options)
    output_nomination(nomination, options)
    return {'ok': True, 'saved': True}
  def test_normal_enacted_bill(self):
    utils.fetch_committee_names(111, {'test': True})

    history = fixtures.bill("hr3590-111")['history']

    self.assertEqual(history['house_passage_result'], 'pass')
    self.assertEqual(self.to_date(history['house_passage_result_at']), "2010-03-21 22:48")
    self.assertEqual(history['senate_passage_result'], 'pass')
    self.assertEqual(self.to_date(history['senate_passage_result_at']), "2009-12-24")
    self.assertEqual(history['vetoed'], False)
    self.assertEqual(history['awaiting_signature'], False)
    self.assertEqual(history['enacted'], True)
    self.assertEqual(self.to_date(history["enacted_at"]), "2010-03-23")
Exemple #4
0
    def test_senate_resolution_went_nowhere(self):
        utils.fetch_committee_names(113, {'test': True})

        history = fixtures.bill("sres4-113")['history']

        self.assertEqual(history['active'], False)
        self.assertTrue(not history.has_key('house_passage_result'))
        self.assertTrue(not history.has_key('house_passage_result_at'))
        self.assertTrue(not history.has_key('senate_cloture_result'))
        self.assertTrue(not history.has_key('senate_cloture_result_at'))
        self.assertTrue(not history.has_key('senate_passage_result'))
        self.assertTrue(not history.has_key('senate_passage_result_at'))
        self.assertEqual(history['vetoed'], False)
        self.assertEqual(history['awaiting_signature'], False)
        self.assertEqual(history['enacted'], False)
Exemple #5
0
    def test_introductory_remarks_are_still_inactive(self):
        utils.fetch_committee_names(113, {'test': True})

        history = fixtures.bill("hr718-113")['history']

        self.assertEqual(history['active'], False)
        self.assertTrue(not history.has_key('house_passage_result'))
        self.assertTrue(not history.has_key('house_passage_result_at'))
        self.assertTrue(not history.has_key('senate_cloture_result'))
        self.assertTrue(not history.has_key('senate_cloture_result_at'))
        self.assertTrue(not history.has_key('senate_passage_result'))
        self.assertTrue(not history.has_key('senate_passage_result_at'))
        self.assertEqual(history['vetoed'], False)
        self.assertEqual(history['awaiting_signature'], False)
        self.assertEqual(history['enacted'], False)
  def test_senate_resolution_went_nowhere(self):
    utils.fetch_committee_names(113, {'test': True})

    history = fixtures.bill("sres4-113")['history']

    self.assertEqual(history['active'], False)
    self.assertTrue(not history.has_key('house_passage_result'))
    self.assertTrue(not history.has_key('house_passage_result_at'))
    self.assertTrue(not history.has_key('senate_cloture_result'))
    self.assertTrue(not history.has_key('senate_cloture_result_at'))
    self.assertTrue(not history.has_key('senate_passage_result'))
    self.assertTrue(not history.has_key('senate_passage_result_at'))
    self.assertEqual(history['vetoed'], False)
    self.assertEqual(history['awaiting_signature'], False)
    self.assertEqual(history['enacted'], False)
  def test_introductory_remarks_are_still_inactive(self):
    utils.fetch_committee_names(113, {'test': True})

    history = fixtures.bill("hr718-113")['history']

    self.assertEqual(history['active'], False)
    self.assertTrue(not history.has_key('house_passage_result'))
    self.assertTrue(not history.has_key('house_passage_result_at'))
    self.assertTrue(not history.has_key('senate_cloture_result'))
    self.assertTrue(not history.has_key('senate_cloture_result_at'))
    self.assertTrue(not history.has_key('senate_passage_result'))
    self.assertTrue(not history.has_key('senate_passage_result_at'))
    self.assertEqual(history['vetoed'], False)
    self.assertEqual(history['awaiting_signature'], False)
    self.assertEqual(history['enacted'], False)
  def test_senate_resolution_failed_voice(self):
    utils.fetch_committee_names(113, {'test': True})

    history = fixtures.bill("sres5-113")['history']

    self.assertEqual(history['active'], True)
    self.assertEqual(self.to_date(history['active_at']), "2013-01-24")
    self.assertTrue(not history.has_key('house_passage_result'))
    self.assertTrue(not history.has_key('house_passage_result_at'))
    self.assertTrue(not history.has_key('senate_cloture_result'))
    self.assertTrue(not history.has_key('senate_cloture_result_at'))
    self.assertEqual(history['senate_passage_result'], 'fail')
    self.assertEqual(self.to_date(history['senate_passage_result_at']), "2013-01-24")
    self.assertEqual(history['vetoed'], False)
    self.assertEqual(history['awaiting_signature'], False)
    self.assertEqual(history['enacted'], False)
  def test_immediately_passed_bill(self):
    utils.fetch_committee_names(113, {'test': True})

    history = fixtures.bill("s227-113")['history']

    self.assertEqual(history['active'], True)
    self.assertEqual(self.to_date(history['active_at']), "2013-02-04")
    self.assertTrue(not history.has_key('house_passage_result'))
    self.assertTrue(not history.has_key('house_passage_result_at'))
    self.assertTrue(not history.has_key('senate_cloture_result'))
    self.assertTrue(not history.has_key('senate_cloture_result_at'))
    self.assertEqual(history['senate_passage_result'], 'pass')
    self.assertEqual(self.to_date(history['senate_passage_result_at']), "2013-02-04")
    self.assertEqual(history['vetoed'], False)
    self.assertEqual(history['awaiting_signature'], False)
    self.assertEqual(history['enacted'], False)
Exemple #10
0
    def test_senate_resolution_failed_voice(self):
        utils.fetch_committee_names(113, {'test': True})

        history = fixtures.bill("sres5-113")['history']

        self.assertEqual(history['active'], True)
        self.assertEqual(self.to_date(history['active_at']), "2013-01-24")
        self.assertTrue(not history.has_key('house_passage_result'))
        self.assertTrue(not history.has_key('house_passage_result_at'))
        self.assertTrue(not history.has_key('senate_cloture_result'))
        self.assertTrue(not history.has_key('senate_cloture_result_at'))
        self.assertEqual(history['senate_passage_result'], 'fail')
        self.assertEqual(self.to_date(history['senate_passage_result_at']),
                         "2013-01-24")
        self.assertEqual(history['vetoed'], False)
        self.assertEqual(history['awaiting_signature'], False)
        self.assertEqual(history['enacted'], False)
Exemple #11
0
    def test_immediately_passed_bill(self):
        utils.fetch_committee_names(113, {'test': True})

        history = fixtures.bill("s227-113")['history']

        self.assertEqual(history['active'], True)
        self.assertEqual(self.to_date(history['active_at']), "2013-02-04")
        self.assertTrue(not history.has_key('house_passage_result'))
        self.assertTrue(not history.has_key('house_passage_result_at'))
        self.assertTrue(not history.has_key('senate_cloture_result'))
        self.assertTrue(not history.has_key('senate_cloture_result_at'))
        self.assertEqual(history['senate_passage_result'], 'pass')
        self.assertEqual(self.to_date(history['senate_passage_result_at']),
                         "2013-02-04")
        self.assertEqual(history['vetoed'], False)
        self.assertEqual(history['awaiting_signature'], False)
        self.assertEqual(history['enacted'], False)
Exemple #12
0
    def test_normal_enacted_bill(self):
        utils.fetch_committee_names(111, {'test': True})

        history = fixtures.bill("hr3590-111")['history']

        self.assertEqual(history['active'], True)
        self.assertEqual(self.to_date(history['active_at']),
                         "2009-10-07 14:35")
        self.assertEqual(history['house_passage_result'], 'pass')
        self.assertEqual(self.to_date(history['house_passage_result_at']),
                         "2010-03-21 22:48")
        self.assertEqual(history['senate_cloture_result'], 'pass')
        self.assertEqual(self.to_date(history['senate_cloture_result_at']),
                         "2009-12-23")
        self.assertEqual(history['senate_passage_result'], 'pass')
        self.assertEqual(self.to_date(history['senate_passage_result_at']),
                         "2009-12-24")
        self.assertEqual(history['vetoed'], False)
        self.assertEqual(history['awaiting_signature'], False)
        self.assertEqual(history['enacted'], True)
        self.assertEqual(self.to_date(history["enacted_at"]), "2010-03-23")
Exemple #13
0
def fetch_nomination(nomination_id, options={}):
    logging.info("\n[%s] Fetching..." % nomination_id)

    # fetch committee name map, if it doesn't already exist
    nomination_type, number, congress = utils.split_nomination_id(
        nomination_id)
    if not number:
        return {
            'saved': False,
            'ok': False,
            'reason': "Couldn't parse %s" % nomination_id
        }

    if not utils.committee_names:
        utils.fetch_committee_names(congress, options)

    # fetch bill details body
    body = utils.download(
        nomination_url_for(nomination_id),
        nomination_cache_for(nomination_id, "information.html"), options)

    if not body:
        return {'saved': False, 'ok': False, 'reason': "failed to download"}

    if options.get("download_only", False):
        return {
            'saved': False,
            'ok': True,
            'reason': "requested download only"
        }
    '''
  # TO DO
  ## detect group nominations, particularly for military promotions
  ## detect when a group nomination is split into sub nominations because of divergent Senate action
  '''

    nomination = parse_nomination(nomination_id, body, options)
    output_nomination(nomination, options)
    return {'ok': True, 'saved': True}
Exemple #14
0
def proc_statute_volume(path, options):
  mods = etree.parse(path + "/mods.xml")
  mods_ns = { "mods": "http://www.loc.gov/mods/v3" }

  # Load the THOMAS committee names for this Congress, which is our best
  # bet for normalizing committee names in the GPO data.
  congress = mods.find( "/mods:extension[2]/mods:congress", mods_ns ).text
  utils.fetch_committee_names(congress, options)

  logging.warn("Processing %s (Congress %s)" % (path, congress))

  package_id = mods.find( "/mods:extension[2]/mods:accessId", mods_ns ).text

  for bill in mods.findall( "/mods:relatedItem", mods_ns ):
    # MODS files also contain information about:
    # ['BACKMATTER', 'FRONTMATTER', 'CONSTAMEND', 'PROCLAMATION', 'REORGPLAN']
    if bill.find( "mods:extension/mods:granuleClass", mods_ns ).text not in [ "PUBLICLAW", "PRIVATELAW", "HCONRES", "SCONRES" ]:
      continue

    # Get the title and source URL (used in error messages).
    title_text = bill.find( "mods:titleInfo/mods:title", mods_ns ).text.replace( '""', '"' )
    source_url = bill.find( "mods:location/mods:url[@displayLabel='Content Detail']", mods_ns ).text

    # Bill number
    bill_elements = bill.findall( "mods:extension/mods:bill[@priority='primary']", mods_ns )
    if len(bill_elements) == 0:
      logging.error("No bill number identified for '%s' (%s)" % (title_text, source_url))
      continue
    elif len(bill_elements) > 1:
      logging.error("Multiple bill numbers identified for '%s'" % title_text)
      for be in bill_elements:
        logging.error("  -- " + etree.tostring(be).strip())
      logging.error("  @ " + source_url)
      continue
    else:
      bill_congress = bill_elements[0].attrib["congress"]
      bill_type = bill_elements[0].attrib["type"].lower()
      bill_number = bill_elements[0].attrib["number"]
      bill_id = "%s%s-%s" % (bill_type, bill_number, bill_congress)

    # Title
    titles = []
    titles.append( {
      "title": title_text,
      "as": "enacted",
      "type": "official",
      "is_for_portion": False,
    } )

    # Subject
    descriptor = bill.find( "mods:extension/mods:descriptor", mods_ns )
    if descriptor is not None:
      subject = descriptor.text
    else:
      subject = None

    # Committees
    committees = []
    cong_committee = bill.find( "mods:extension/mods:congCommittee", mods_ns )
    if cong_committee is not None:
      chambers = { "H": "House", "S": "Senate", "J": "Joint" }
      committee = chambers[cong_committee.attrib["chamber"]] + " " + cong_committee.find( "mods:name", mods_ns ).text
      committee_info = {
        "committee": committee,
        "activity": [], # XXX
        "committee_id": utils.committee_names[committee] if committee in utils.committee_names else None,
      }
      committees.append( committee_info )

    # The 'granuleDate' is the enactment date?
    granule_date = bill.find( "mods:extension/mods:granuleDate", mods_ns ).text

    sources = [{
      "source": "statutes",
      "package_id": package_id,
      "access_id": bill.find( "mods:extension/mods:accessId", mods_ns ).text,
      "source_url": source_url,
      "volume": bill.find( "mods:extension/mods:volume", mods_ns ).text,
      "page": bill.find( "mods:part[@type='article']/mods:extent[@unit='pages']/mods:start", mods_ns ).text,
      "position": bill.find( "mods:extension/mods:pagePosition", mods_ns ).text,
    }]

    law_elements = bill.findall( "mods:extension/mods:law", mods_ns )

    # XXX: If <law> is missing, this assumes it is a concurrent resolution.
    #      This may be a problem if the code is updated to accept joint resolutions for constitutional amendments.
    if ( law_elements is None ) or ( len( law_elements ) != 1 ):
      other_chamber = { "HOUSE": "s", "SENATE": "h" }

      actions = [{
        "type": "vote",
        "vote_type": "vote2",
        "where": other_chamber[bill.find( "mods:extension/mods:originChamber", mods_ns ).text],
        "result": "pass", # XXX
        "how": "unknown", # XXX
#        "text": "",
        "acted_at": granule_date, # XXX
        "status": "PASSED:CONCURRENTRES",
        "references": [], # XXX
      }]
    else:
      law_congress = law_elements[0].attrib["congress"]
      law_number = law_elements[0].attrib["number"]
      law_type = ( "private" if ( law_elements[0].attrib["isPrivate"] == "true" ) else "public" )

      # Check for typos in the metadata.
      if law_congress != bill_congress:
        logging.error("Congress mismatch for %s%s: %s or %s? (%s)" % ( bill_type, bill_number, bill_congress, law_congress, source_url ) )
        continue

      actions = [{
        "congress": law_congress,
        "number": law_number,
        "type": "enacted",
        "law": law_type,
        "text": "Became %s Law No: %s-%s." % ( law_type.capitalize(), law_congress, law_number ),
        "acted_at": granule_date, # XXX
        "status": "ENACTED:SIGNED", # XXX: Check for overridden vetoes!
        "references": [], # XXX
      }]

    status, status_date = bill_info.latest_status( actions )

    bill_data = {
      'bill_id': bill_id,
      'bill_type': bill_type,
      'number': bill_number,
      'congress': bill_congress,

      'introduced_at': None, # XXX
      'sponsor': None, # XXX
      'cosponsors': [], # XXX

      'actions': actions, # XXX
      'history': bill_info.history_from_actions( actions ),
      'status': status,
      'status_at': status_date,
      'enacted_as': bill_info.slip_law_from( actions ),

      'titles': titles,
      'official_title': bill_info.current_title_for( titles, "official" ),
      'short_title': bill_info.current_title_for( titles, "short" ), # XXX
      'popular_title': bill_info.current_title_for( titles, "popular" ), # XXX

      'subjects_top_term': subject,
      'subjects': [],

      'related_bills': [], # XXX: <associatedBills> usually only lists the current bill.
      'committees': committees,
      'amendments': [], # XXX

      'sources': sources,
      'updated_at': datetime.datetime.fromtimestamp(time.time()),
    }

    if not options.get('textversions', False):
        bill_info.output_bill( bill_data, options )

    # XXX: Can't use bill_versions.fetch_version() because it depends on fdsys.
    version_code = "enr"
    bill_version_id = "%s%s-%s-%s" % ( bill_type, bill_number, bill_congress, version_code )
    bill_version = {
      'bill_version_id': bill_version_id,
      'version_code': version_code,
      'issued_on': status_date,
      'urls': { "pdf": bill.find( "mods:location/mods:url[@displayLabel='PDF rendition']", mods_ns ).text },
      'sources': sources,
    }
    utils.write(
      json.dumps(bill_version, sort_keys=True, indent=2, default=utils.format_datetime),
      bill_versions.output_for_bill_version(bill_version_id)
    )
    
    # Process the granule PDF.
    # - Hard-link it into the right place to be seen as bill text.
    # - Run "pdftotext -layout" to convert it to plain text and save it in the bill text location.
    pdf_file = path + "/" + sources[0]["access_id"] + "/document.pdf"
    if os.path.exists(pdf_file):
      dst_path = fdsys.output_for_bill(bill_data["bill_id"], "text-versions/" + version_code, is_data_dot=False)
      if options.get("linkpdf", False):
        os.link(pdf_file, dst_path + "/document.pdf") # a good idea
      if options.get("extracttext", False):
        logging.error("Running pdftotext on %s..." % pdf_file)
        if subprocess.call(["pdftotext", "-layout", pdf_file, dst_path + "/document.txt"]) != 0:
          raise Exception("pdftotext failed on %s" % pdf_file)

  return {'ok': True, 'saved': True}
Exemple #15
0
def proc_statute(path, options):
  mods = etree.parse(path + "/mods.xml")
  mods_ns = { "mods": "http://www.loc.gov/mods/v3" }

  # Load the THOMAS committee names for this Congress, which is our best
  # bet for normalizing committee names in the GPO data.
  congress = mods.find( "/mods:extension[2]/mods:congress", mods_ns ).text
  utils.fetch_committee_names(congress, options)

  logging.warn("Processing %s (Congress %s)" % (path, congress))

  for bill in mods.findall( "/mods:relatedItem", mods_ns ):
    titles = []

    titles.append( {
      "title": bill.find( "mods:titleInfo/mods:title", mods_ns ).text.replace( '""', '"' ),
      "as": "enacted",
      "type": "official",
    } )

    descriptor = bill.find( "mods:extension/mods:descriptor", mods_ns )

    if descriptor is not None:
      subject = descriptor.text
    else:
      subject = None

    # MODS files also contain information about:
    # ['BACKMATTER', 'FRONTMATTER', 'CONSTAMEND', 'PROCLAMATION', 'REORGPLAN']
    if bill.find( "mods:extension/mods:granuleClass", mods_ns ).text not in [ "PUBLICLAW", "PRIVATELAW", "HCONRES", "SCONRES" ]:
      continue

    committees = []

    cong_committee = bill.find( "mods:extension/mods:congCommittee", mods_ns )

    if cong_committee is not None:
      chambers = { "H": "House", "S": "Senate", "J": "Joint" }

      committee = chambers[cong_committee.attrib["chamber"]] + " " + cong_committee.find( "mods:name", mods_ns ).text

      committee_info = {
        "committee": committee,
        "activity": [], # XXX
        "committee_id": utils.committee_names[committee] if committee in utils.committee_names else None,
      }

      committees.append( committee_info )

    bill_elements = bill.findall( "mods:extension/mods:bill", mods_ns )

    if ( bill_elements is None ) or ( len( bill_elements ) != 1 ):
      logging.error("Could not get bill data for %s" % repr(titles) )
      continue
    else:
      bill_congress = bill_elements[0].attrib["congress"]
      bill_type = bill_elements[0].attrib["type"].lower()
      bill_number = bill_elements[0].attrib["number"]
      bill_id = "%s%s-%s" % (bill_type, bill_number, bill_congress)

    actions = []

    law_elements = bill.findall( "mods:extension/mods:law", mods_ns )

    # XXX: If <law> is missing, this assumes it is a concurrent resolution.
    #      This may be a problem if the code is updated to accept joint resolutions for constitutional amendments.
    if ( law_elements is None ) or ( len( law_elements ) != 1 ):
      other_chamber = { "HOUSE": "s", "SENATE": "h" }

      action = {
        "type": "vote",
        "vote_type": "vote2",
        "where": other_chamber[bill.find( "mods:extension/mods:originChamber", mods_ns ).text],
        "result": "pass", # XXX
        "how": "unknown", # XXX
#        "text": "",
        "acted_at": bill.find( "mods:extension/mods:granuleDate", mods_ns ).text, # XXX
        "status": "PASSED:CONCURRENTRES",
        "references": [], # XXX
      }
    else:
      law_congress = law_elements[0].attrib["congress"]
      law_number = law_elements[0].attrib["number"]
      law_type = ( "private" if ( law_elements[0].attrib["isPrivate"] == "true" ) else "public" )

      action = {
        "congress": law_congress,
        "number": law_number,
        "type": "enacted",
        "law": law_type,
        "text": "Became %s Law No: %s-%s." % ( law_type.capitalize(), law_congress, law_number ),
        "acted_at": bill.find( "mods:extension/mods:granuleDate", mods_ns ).text, # XXX
        "status": "ENACTED:SIGNED", # XXX: Check for overridden vetoes!
        "references": [], # XXX
      }

    actions.append( action )

    # Check for typos in the metadata.
    if law_congress != bill_congress:
      logging.error("Congress mismatch for %s%s: %s or %s?" % ( bill_type, bill_number, bill_congress, law_congress ) )
      continue

    status, status_date = bill_info.latest_status( actions )

    bill_data = {
      'bill_id': bill_id,
      'bill_type': bill_type,
      'number': bill_number,
      'congress': bill_congress,

      'introduced_at': None, # XXX
      'sponsor': None, # XXX
      'cosponsors': [], # XXX

      'actions': actions, # XXX
      'history': bill_info.history_from_actions( actions ),
      'status': status,
      'status_at': status_date,
      'enacted_as': bill_info.slip_law_from( actions ),

      'titles': titles,
      'official_title': bill_info.current_title_for( titles, "official" ),
      'short_title': bill_info.current_title_for( titles, "short" ), # XXX
      'popular_title': bill_info.current_title_for( titles, "popular" ), # XXX

#      'summary': summary,
      'subjects_top_term': subject,
      'subjects': [],

      'related_bills': [], # XXX: <associatedBills> usually only lists the current bill.
      'committees': committees,
      'amendments': [], # XXX

      'updated_at': datetime.datetime.fromtimestamp(time.time()),
    }

    bill_info.output_bill( bill_data, options )

    # XXX: Can't use bill_versions.fetch_version() because it depends on fdsys.
    version_code = "enr"
    bill_version_id = "%s%s-%s-%s" % ( bill_type, bill_number, bill_congress, version_code )
    bill_version = {
      'bill_version_id': bill_version_id,
      'version_code': version_code,
      'issued_on': status_date,
      'urls': { "pdf": bill.find( "mods:location/mods:url[@displayLabel='PDF rendition']", mods_ns ).text },
    }
    import json, bill_versions
    utils.write(
      json.dumps(bill_version, sort_keys=True, indent=2, default=utils.format_datetime),
      bill_versions.output_for_bill_version(bill_version_id)
    )

  return {'ok': True, 'saved': True}