Example #1
0
def run(options):
    vote_id = options.get('vote_id', None)

    if vote_id:
        vote_chamber, vote_number, congress, session_year = utils.split_vote_id(vote_id)
        to_fetch = [vote_id]
    else:
        congress = options.get('congress', None)
        if congress:
            session_year = options.get('session', None)
            if not session_year:
                logging.error("If you provide a --congress, provide a --session year.")
                return None
        else:
            congress = utils.current_congress()
            session_year = options.get('session', str(utils.current_legislative_year()))

        chamber = options.get('chamber', None)

        if chamber == "house":
            to_fetch = vote_ids_for_house(congress, session_year, options)
        elif chamber == "senate":
            to_fetch = vote_ids_for_senate(congress, session_year, options)
        else:
            to_fetch = (vote_ids_for_house(congress, session_year, options) or []) + (vote_ids_for_senate(congress, session_year, options) or [])

        if not to_fetch:
            if not options.get("fast", False):
                logging.error("Error figuring out which votes to download, aborting.")
            else:
                logging.warn("No new or recent votes.")
            return None

        limit = options.get('limit', None)
        if limit:
            to_fetch = to_fetch[:int(limit)]

    if options.get('pages_only', False):
        return None

    logging.warn("Going to fetch %i votes from congress #%s session %s" % (len(to_fetch), congress, session_year))

    utils.process_set(to_fetch, vote_info.fetch_vote, options)
Example #2
0
def run(options):
    vote_id = options.get('vote_id', None)

    if vote_id:
        vote_chamber, vote_number, congress, session_year = utils.split_vote_id(vote_id)
        to_fetch = [vote_id]
    else:
        congress = options.get('congress', None)
        if congress:
            session_year = options.get('session', None)
            if not session_year:
                logging.error("If you provide a --congress, provide a --session year.")
                return None
        else:
            congress = utils.current_congress()
            session_year = options.get('session', str(utils.current_legislative_year()))

        chamber = options.get('chamber', None)

        if chamber == "house":
            to_fetch = vote_ids_for_house(congress, session_year, options)
        elif chamber == "senate":
            to_fetch = vote_ids_for_senate(congress, session_year, options)
        else:
            to_fetch = (vote_ids_for_house(congress, session_year, options) or []) + (vote_ids_for_senate(congress, session_year, options) or [])

        if not to_fetch:
            if not options.get("fast", False):
                logging.error("Error figuring out which votes to download, aborting.")
            else:
                logging.warn("No new or recent votes.")
            return None

        limit = options.get('limit', None)
        if limit:
            to_fetch = to_fetch[:int(limit)]

    if options.get('pages_only', False):
        return None

    logging.warn("Going to fetch %i votes from congress #%s session %s" % (len(to_fetch), congress, session_year))

    utils.process_set(to_fetch, vote_info.fetch_vote, options)
def fetch_senate_committee_meetings(committees, options):
    # Load any existing meetings file so we can recycle any GUIDs.
    existing_meetings = []
    output_file = output_for("senate")
    if os.path.exists(output_file):
        existing_meetings = json.load(open(output_file))

    options = dict(options)  # clone
    options["binary"] = True  #
    options["force"] = True

    meetings = []

    dom = lxml.etree.fromstring(
        utils.download(
            "http://www.senate.gov/general/committee_schedules/hearings.xml",
            "committee_schedule/senate.xml", options))

    for node in dom.xpath("meeting"):
        committee_id = str(node.xpath('string(cmte_code)'))
        if committee_id.strip() == "":
            continue  # "No committee hearings scheduled" placeholder
        occurs_at = str(node.xpath('string(date)'))
        room = str(node.xpath('string(room)'))
        topic = str(node.xpath('string(matter)'))

        occurs_at = datetime.datetime.strptime(occurs_at, "%d-%b-%Y %I:%M %p")
        topic = re.sub(r"\s+", " ", topic).strip()

        # Validate committee code.
        try:
            committee_code, subcommittee_code = re.match(
                r"(\D+)(\d+)$", committee_id).groups()
            if committee_code not in committees:
                raise ValueError(committee_code)
            if subcommittee_code == "00":
                subcommittee_code = None
            if subcommittee_code and subcommittee_code not in committees[
                    committee_code]["subcommittees"]:
                raise ValueError(subcommittee_code)
        except:
            print("Invalid committee code", committee_id)
            continue

        # See if this meeting already exists. If so, take its GUID.
        # Assume meetings are the same if they are for the same committee/subcommittee and
        # at the same time.
        for mtg in existing_meetings:
            if mtg["committee"] == committee_code and mtg.get(
                    "subcommittee", None) == subcommittee_code and mtg[
                        "occurs_at"] == occurs_at.isoformat():
                if options.get("debug", False):
                    print("[%s] Reusing gUID." % mtg["guid"])
                guid = mtg["guid"]
                break
        else:
            # Not found, so create a new ID.
            # TODO: Can we make this a human-readable ID?
            guid = str(uuid.uuid4())

        # Scrape the topic text for mentions of bill numbers.
        congress = utils.congress_from_legislative_year(
            utils.current_legislative_year(occurs_at))
        bills = []
        bill_number_re = re.compile(
            r"(hr|s|hconres|sconres|hjres|sjres|hres|sres)\s?(\d+)", re.I)
        for bill_match in bill_number_re.findall(topic.replace(".", "")):
            bills.append(bill_match[0].lower() + bill_match[1] + "-" +
                         str(congress))

        # Create the meeting event.
        if options.get("debug", False):
            print("[senate][%s][%s] Found meeting in room %s at %s." %
                  (committee_code, subcommittee_code, room,
                   occurs_at.isoformat()))

        meetings.append({
            "chamber": "senate",
            "congress": congress,
            "guid": guid,
            "committee": committee_code,
            "subcommittee": subcommittee_code,
            "occurs_at": occurs_at.isoformat(),
            "room": room,
            "topic": topic,
            "bill_ids": bills,
        })

    print("[senate] Found %i meetings." % len(meetings))
    return meetings
Example #4
0
def fetch_senate_committee_meetings(existing_meetings, committees, options):
  # Parse the Senate committee meeting XML feed for meetings.
  # To aid users of the data, attempt to assign GUIDs to meetings.

  options = dict(options) # clone
  options["binary"] = True

  meetings = []

  dom = lxml.etree.fromstring(utils.download(
    "http://www.senate.gov/general/committee_schedules/hearings.xml",
    "committee_schedule/senate.xml",
    options))

  for node in dom.xpath("meeting"):
    committee_id = unicode(node.xpath('string(cmte_code)'))
    if committee_id.strip() == "": continue # "No committee hearings scheduled" placeholder
    occurs_at = unicode(node.xpath('string(date)'))
    room = unicode(node.xpath('string(room)'))
    topic = unicode(node.xpath('string(matter)'))

    occurs_at = datetime.datetime.strptime(occurs_at, "%d-%b-%Y %I:%M %p")
    topic = re.sub(r"\s+", " ", topic).strip()

    # Validate committee code.
    try:
      committee_code, subcommittee_code = re.match(r"(\D+)(\d+)$", committee_id).groups()
      if committee_code not in committees: raise ValueError(committee_code)
      if subcommittee_code == "00": subcommittee_code = None
      if subcommittee_code and subcommittee_code not in committees[committee_code]["subcommittees"]: raise ValueError(subcommittee_code)
    except:
      print "Invalid committee code", committee_id
      continue

    # See if this meeting already exists. If so, take its GUID.
    # Assume meetings are the same if they are for the same committee/subcommittee and
    # at the same time.
    for mtg in existing_meetings:
      if mtg["committee"] == committee_code and mtg.get("subcommittee", None) == subcommittee_code and mtg["occurs_at"] == occurs_at.isoformat():
        guid = mtg["guid"]
        break
    else:
      # Not found, so create a new ID.
      guid = unicode(uuid.uuid4())

    # Scrape the topic text for mentions of bill numbers.
    congress = utils.congress_from_legislative_year(utils.current_legislative_year(occurs_at))
    bills = []
    bill_number_re = re.compile(r"(hr|s|hconres|sconres|hjres|sjres|hres|sres)\s?(\d+)", re.I)
    for bill_match in bill_number_re.findall(topic.replace(".", "")):
      bills.append( bill_match[0].lower() + bill_match[1] + "-" + str(congress) )

    # Create the meeting event.
    meetings.append({
      "chamber": "senate",
      "congress": congress,
      "guid": guid,
      "committee": committee_code,
      "subcommittee": subcommittee_code,
      "occurs_at": occurs_at.isoformat(),
      "room": room,
      "topic": topic,
      "bills": bills,
    })

  return meetings
def fetch_senate_committee_meetings(existing_meetings, committees, options):
    # Parse the Senate committee meeting XML feed for meetings.
    # To aid users of the data, attempt to assign GUIDs to meetings.

    options = dict(options)  # clone
    options["binary"] = True

    meetings = []

    dom = lxml.etree.fromstring(
        utils.download(
            "http://www.senate.gov/general/committee_schedules/hearings.xml",
            "committee_schedule/senate.xml", options))

    for node in dom.xpath("meeting"):
        committee_id = unicode(node.xpath('string(cmte_code)'))
        if committee_id.strip() == "":
            continue  # "No committee hearings scheduled" placeholder
        occurs_at = unicode(node.xpath('string(date)'))
        room = unicode(node.xpath('string(room)'))
        topic = unicode(node.xpath('string(matter)'))

        occurs_at = datetime.datetime.strptime(occurs_at, "%d-%b-%Y %I:%M %p")
        topic = re.sub(r"\s+", " ", topic).strip()

        # Validate committee code.
        try:
            committee_code, subcommittee_code = re.match(
                r"(\D+)(\d+)$", committee_id).groups()
            if committee_code not in committees:
                raise ValueError(committee_code)
            if subcommittee_code == "00": subcommittee_code = None
            if subcommittee_code and subcommittee_code not in committees[
                    committee_code]["subcommittees"]:
                raise ValueError(subcommittee_code)
        except:
            print "Invalid committee code", committee_id
            continue

        # See if this meeting already exists. If so, take its GUID.
        # Assume meetings are the same if they are for the same committee/subcommittee and
        # at the same time.
        for mtg in existing_meetings:
            if mtg["committee"] == committee_code and mtg.get(
                    "subcommittee", None) == subcommittee_code and mtg[
                        "occurs_at"] == occurs_at.isoformat():
                guid = mtg["guid"]
                break
        else:
            # Not found, so create a new ID.
            guid = unicode(uuid.uuid4())

        # Scrape the topic text for mentions of bill numbers.
        congress = utils.congress_from_legislative_year(
            utils.current_legislative_year(occurs_at))
        bills = []
        bill_number_re = re.compile(
            r"(hr|s|hconres|sconres|hjres|sjres|hres|sres)\s?(\d+)", re.I)
        for bill_match in bill_number_re.findall(topic.replace(".", "")):
            bills.append(bill_match[0].lower() + bill_match[1] + "-" +
                         str(congress))

        # Create the meeting event.
        meetings.append({
            "chamber": "senate",
            "congress": congress,
            "guid": guid,
            "committee": committee_code,
            "subcommittee": subcommittee_code,
            "occurs_at": occurs_at.isoformat(),
            "room": room,
            "topic": topic,
            "bills": bills,
        })

    return meetings
def fetch_senate_committee_meetings(committees, options):
    # Load any existing meetings file so we can recycle any GUIDs.
    existing_meetings = []
    output_file = output_for("senate")
    if os.path.exists(output_file):
        existing_meetings = json.load(open(output_file))

    options = dict(options)  # clone
    options["binary"] = True  #
    options["force"] = True

    meetings = []

    dom = lxml.etree.fromstring(
        utils.download(
            "http://www.senate.gov/general/committee_schedules/hearings.xml", "committee_schedule/senate.xml", options
        )
    )

    for node in dom.xpath("meeting"):
        committee_id = unicode(node.xpath("string(cmte_code)"))
        if committee_id.strip() == "":
            continue  # "No committee hearings scheduled" placeholder
        occurs_at = unicode(node.xpath("string(date)"))
        room = unicode(node.xpath("string(room)"))
        topic = unicode(node.xpath("string(matter)"))

        occurs_at = datetime.datetime.strptime(occurs_at, "%d-%b-%Y %I:%M %p")
        topic = re.sub(r"\s+", " ", topic).strip()

        # Validate committee code.
        try:
            committee_code, subcommittee_code = re.match(r"(\D+)(\d+)$", committee_id).groups()
            if committee_code not in committees:
                raise ValueError(committee_code)
            if subcommittee_code == "00":
                subcommittee_code = None
            if subcommittee_code and subcommittee_code not in committees[committee_code]["subcommittees"]:
                raise ValueError(subcommittee_code)
        except:
            print "Invalid committee code", committee_id
            continue

        # See if this meeting already exists. If so, take its GUID.
        # Assume meetings are the same if they are for the same committee/subcommittee and
        # at the same time.
        for mtg in existing_meetings:
            if (
                mtg["committee"] == committee_code
                and mtg.get("subcommittee", None) == subcommittee_code
                and mtg["occurs_at"] == occurs_at.isoformat()
            ):
                if options.get("debug", False):
                    print "[%s] Reusing gUID." % mtg["guid"]
                guid = mtg["guid"]
                break
        else:
            # Not found, so create a new ID.
            # TODO: Can we make this a human-readable ID?
            guid = unicode(uuid.uuid4())

        # Scrape the topic text for mentions of bill numbers.
        congress = utils.congress_from_legislative_year(utils.current_legislative_year(occurs_at))
        bills = []
        bill_number_re = re.compile(r"(hr|s|hconres|sconres|hjres|sjres|hres|sres)\s?(\d+)", re.I)
        for bill_match in bill_number_re.findall(topic.replace(".", "")):
            bills.append(bill_match[0].lower() + bill_match[1] + "-" + str(congress))

        # Create the meeting event.
        if options.get("debug", False):
            print "[senate][%s][%s] Found meeting in room %s at %s." % (
                committee_code,
                subcommittee_code,
                room,
                occurs_at.isoformat(),
            )

        meetings.append(
            {
                "chamber": "senate",
                "congress": congress,
                "guid": guid,
                "committee": committee_code,
                "subcommittee": subcommittee_code,
                "occurs_at": occurs_at.isoformat(),
                "room": room,
                "topic": topic,
                "bill_ids": bills,
            }
        )

    print "[senate] Found %i meetings." % len(meetings)
    return meetings