def get_transparency_stats(person, role, stats, congress, startdate, enddate):
	global transparency_bills
	if not transparency_bills:
		transparency_bills = []
		for line in open("analysis/transparency-bills.txt"):
			bill = Bill.from_congressproject_id(re.split("\s", line)[0])
			if bill.congress != congress: continue
			transparency_bills.append(bill)

	# which bills are in the right chamber?
	plausible_bills = []
	for bill in transparency_bills:
		if BillType.by_value(bill.bill_type).chamber == RoleType.by_value(role.role_type).congress_chamber:
			plausible_bills.append(bill)

	# did person sponsor any of these within this session?
	sponsored = []
	for bill in transparency_bills:
		if startdate <= bill.introduced_date <= enddate and bill.sponsor == person:
			sponsored.append(bill)

	# did person cosponsor any of these within this session?
	cosponsored = []
	for cosp in Cosponsor.objects.filter(person=person, bill__in=transparency_bills, joined__gte=startdate, joined__lte=enddate):
		cosponsored.append(cosp.bill)

	stats["transparency-bills"] = {
		"value": len(sponsored)*3 + len(cosponsored),
		"sponsored": make_bill_entries(sponsored),
		"cosponsored": make_bill_entries(cosponsored),
		"num_bills": len(plausible_bills),
		"chamber": RoleType.by_value(role.role_type).congress_chamber,
	}
def get_sponsorship_analysis_stats(person, role, stats):
	global sponsorship_analysis_data
	s = sponsorship_analysis_data.get( (role.role_type, person.id) )
	stats["ideology"] = {
		"value": s[0] if s else None,
		"role": RoleType.by_value(role.role_type).key,
	}
	stats["leadership"] = {
		"value": s[1] if s else None,
		"role": RoleType.by_value(role.role_type).key,
	}
Exemple #3
0
 def fmt_role(r):
     return {
         "id": r.person.id,
         "name": r.person.name_and_title(),
         "link": r.person.get_absolute_url(),
         "type": RoleType.by_value(r.role_type).key,
         "pronoun": Gender.by_value(r.person.gender).pronoun,
         "key_vote": get_key_vote(r.person),
     }
def get_cohorts(person, role, congress, session, committee_membership):
	cohorts = []

	# chamber
	chamber = RoleType.by_value(role.role_type).congress_chamber.lower()
	cohorts.append({ "key": chamber })

	# party
	cohorts.append({ "key": "party-%s-%s" % (chamber, role.party.lower()), "chamber": chamber, "party": role.party })

	# state delegation
	if role.role_type == RoleType.representative:
		cohorts.append({ "key": "house-state-delegation-" + role.state.lower(), "state": role.state })

	# chamber leadership position
	if role.leadership_title:
		cohorts.append({ "key": chamber + "-leadership", "position": role.leadership_title })

	# freshmen/sophomores
	min_start_date = None
	prev_congresses_served = set()
	# use enddate__lte=endate to include the current role itself since for
	# senators their current role may span previous congresses
	for r in PersonRole.objects.filter(person=person, role_type=role.role_type, enddate__lte=role.enddate):
		if not min_start_date or r.startdate < min_start_date: min_start_date = r.startdate
		for c in r.congress_numbers():
			if c < congress:
				prev_congresses_served.add(c)
	if len(prev_congresses_served) == 0: cohorts.append({ "key": chamber + "-freshmen", "chamber": chamber })
	if len(prev_congresses_served) == 1: cohorts.append({ "key": chamber + "-sophomores", "chamber": chamber })
	if min_start_date and (role.enddate - min_start_date).days > 365.25*10: cohorts.append({ "key": chamber + "-tenyears", "chamber": chamber, "first_date": min_start_date.isoformat()  })

	# committee leadership positions
	committee_positions = []
	for committee, committee_role in committee_membership.get(person.id, {}).items():
		if len(committee) != 4: continue # exclude subcommittees
		if committee_role in (CommitteeMemberRole.ranking_member, CommitteeMemberRole.vice_chairman, CommitteeMemberRole.chairman):
			committee_positions.append( (committee, committee_role) )
	if len(committee_positions) > 0:
		cohorts.append({ "key": chamber + "-committee-leaders", "chamber": chamber, "positions": committee_positions })

	# safe vs competitive seats
	if role.role_type == RoleType.representative:
		global competitive_seats
		if competitive_seats is None:
			competitive_seats = set()
			for line in open("analysis/cook_competitive_seats-%s.txt" % session):
				if line[0] == "#": continue
				(state, district) = line.split(" ")[0].split("-")
				competitive_seats.add( (state, int(district)) )
		if (role.state, role.district) in competitive_seats:
			cohorts.append({ "key": chamber + "-competitive-seat" })
		else:
			cohorts.append({ "key": chamber + "-safe-seat" })

	return cohorts
def get_vote_stats(person, role, stats, votes_this_year):
	# Missed vote % in the chamber that the Member is currently serving in.
	if role.leadership_title == "Speaker": return
	votes_elligible = Voter.objects.filter(person=person, vote__in=votes_this_year[role.role_type])
	votes_missed = votes_elligible.filter(option__key="0")
	v1 = votes_elligible.count()
	v2 = votes_missed.count()
	stats["missed-votes"] = {
		"value": round(100.0*v2/v1, 3) if v1 > 0 else None,
		"elligible": v1,
		"missed": v2,
		"role": RoleType.by_value(role.role_type).key,
	}
def get_cohorts(person, role, congress, session, committee_membership):
	cohorts = []

	# chamber
	chamber = RoleType.by_value(role.role_type).congress_chamber.lower()
	cohorts.append({ "key": chamber })

	# party
	cohorts.append({ "key": "party-%s-%s" % (chamber, role.party.lower()), "chamber": chamber, "party": role.party })

	# state delegation
	if role.role_type == RoleType.representative:
		cohorts.append({ "key": "house-state-delegation-" + role.state.lower(), "state": role.state })

	# chamber leadership position
	if role.leadership_title:
		cohorts.append({ "key": chamber + "-leadership", "position": role.leadership_title })

	# freshmen/sophomores
	min_start_date = None
	years_served = 0
	prev_congresses_served = set()
	# use enddate__lte=endate to include the current role itself since for
	# senators their current role may span previous congresses
	for r in PersonRole.objects.filter(person=person, role_type=role.role_type, enddate__lte=role.enddate):
		if not min_start_date or r.startdate < min_start_date: min_start_date = r.startdate
		years_served += round( (min(r.enddate,datetime.datetime.now().date())-r.startdate).days / 365.25 ) # end dates for senators may be far in the future; round because terms may be slightly less than a year
		for c in r.congress_numbers():
			if c < congress:
				prev_congresses_served.add(c)
	if person.id in (412505, 412503, 412506, 412507):
		# Schatz served only a few days in the 112th Congress. Other members took office
		# within the last two months of the 112th Congress. For the 2013 stats, I originally
		# classified them as sophomores. I revised it to drop that cohort from Schatz after
		# hearing from his office. For 2014 stats, I am classifying them all as a freshman
		# rather than sophomores; for 2015/2016 I am classifying them as sophomores.
		if session == "2014":
			assert prev_congresses_served == { 112 }
			prev_congresses_served = set()
		elif session in ("2015", "2016"):
			assert prev_congresses_served == { 112, 113 }
			prev_congresses_served = set([113])
	if person.id in (412605, 412606, 412607):
		# Similarly, Dave Brat (412605), Donald Norcross (412606), and Alma Adams (412607)
		# took office on 2014-11-12, but we'll treat them as freshman in the 114th Congress
		# rather than sophomores.
		if session in ("2015", "2016"):
			assert prev_congresses_served == { 113 }
			prev_congresses_served = set()
		elif session in ("2017", "2018"):
			assert prev_congresses_served == { 113, 114 }
			prev_congresses_served = set([114])
	if person.id in (412676, 412676):
		# Similarly, James Comer, Dwight Evans took office only after the 2016 election, so we'll
		# keep them as a freshman again in 2017, 2018 and then sophomores in 2019, 2020.
		if session in ("2017", "2018"):
			assert prev_congresses_served == { 114 }
			prev_congresses_served = set()
		elif session in ("2019", "2020"):
			assert prev_congresses_served == { 114, 115 }
			prev_congresses_served = set([115])
	if person.id in (412748, 412749, 412750, 412751):
		# Similarly, Kevin Hern, Joseph D. Morelle, Mary Gay Scanlon, Susan Wild took office only after the 2018 election, so we'll
		# keep them as a freshman again in 2019, 2020 and then sophomores in 2021, 2022.
		if session in ("2019", "2020"):
			assert prev_congresses_served == { 115 }
			prev_congresses_served = set()
		elif session in ("2021", "2022"):
			assert prev_congresses_served == { 115, 116 }
			prev_congresses_served = set([116])
	if len(prev_congresses_served) == 0: cohorts.append({ "key": chamber + "-freshmen", "chamber": chamber })
	if len(prev_congresses_served) == 1: cohorts.append({ "key": chamber + "-sophomores", "chamber": chamber })
	if years_served >= 10: cohorts.append({ "key": chamber + "-tenyears", "chamber": chamber, "first_date": min_start_date.isoformat()  })

	return cohorts
def get_sponsor_stats(person, role, stats, congress, startdate, enddate, committee_membership):
	# How many bills did the Member introduce during this time window?
	bills = Bill.objects.filter(sponsor=person, congress=congress,
		introduced_date__gte=startdate, introduced_date__lte=enddate)
	stats["bills-introduced"] = {
		"value": bills.count(),
	}

	# How many bills were "enacted" within this time window? Follow the was_enacted_ex logic
	# which includes text incorporation. Only use the date range on single-year stats because
	# a bill might be enacted after the end of a Congress. Don't count the bill itself if it became
    # the vehicle for passage of an unrelated matter, because the original sponsor has nothing to
    # do with that and shouldn't get credit. But do count it if it was incorporated into a bill
    # that was a vehicle for passage.
	def was_bill_enacted(b, startdate, enddate):
		if (enddate-startdate).days > 365*1.5:
			bs = b.was_enacted_ex()
		else:
			bs = b.was_enacted_ex(restrict_to_activity_in_date_range=(startdate, enddate))
		for b1 in (bs or []):
			if b1 == b and b1.original_intent_replaced: continue
			return True # i.e. was enacted via any bill that isn't itself and itself was a vehicle
		return False
	bills_enacted = [b for b in bills if was_bill_enacted(b, startdate, enddate)]
	stats["bills-enacted-ti"] = {
		"value": len(bills_enacted),
		"bills": make_bill_entries(bills_enacted),
	}

	# In order to test these remaining factors, we have to look more closely
	# at the bill because we can only use activities that ocurred during the
	# time window so that if we re-run this script on the same window at a
	# later date nothing changes -- i.e. future activitiy on bills should
	# not affect 1st Session statistics. Mostly.
	bills = list(bills)
	was_reported = []
	has_cmte_leaders = []
	has_bipartisan_cosponsor = []
	has_companion = []
	for bill in bills:
		# Skip any bills that were the vehicle for passage of an unrelated matter
		# (and the original matter is gone).
		if bill.original_intent_replaced:
			continue

		# Check if the bill was reported during this time period.
        # We'll consider a bill reported if it had any major status change,
		# since a report per se doesn't always occur. Find the first
		# major status change and see if that occurred in the time
		# window of these stats.
		first_major_status_change = None
		for datestr, st, text, srcxml in bill.major_actions:
			if st != BillStatus.introduced:
				d = eval(datestr)
				if isinstance(d, datetime.datetime): d = d.date()
				first_major_status_change = d
				break
		if first_major_status_change and (startdate <= first_major_status_change <= enddate):
			was_reported.append(bill)
		elif bill in bills_enacted:
			# Also give credit if the bill's provisions were enacted via
			# other legislation during this time period.
			was_reported.append(bill)

		# Check whether any cosponsors are on relevant committees.
		# Warning: Committee membership data is volatile, so re-running the stats may come out different.
		cosponsors = list(Cosponsor.objects.filter(bill=bill, joined__gte=startdate, joined__lte=enddate).select_related("person", "role"))
		x = False
		for committee in list(bill.committees.all()):
			for cosponsor in cosponsors:
				if committee_membership.get(cosponsor.person.id, {}).get(committee.code) in (CommitteeMemberRole.ranking_member, CommitteeMemberRole.vice_chair, CommitteeMemberRole.chair):
					x = True
		if x: has_cmte_leaders.append(bill)

		# Check whether there's a cosponsor from the opposite party of the sponsor.
		# For independents, use the party they caucus with.
		# Warning: If someone switches parties, this will change.
		sponsor_party = bill.sponsor_role.caucus or bill.sponsor_role.party
		assert sponsor_party in ("Republican", "Democrat")
		for cosponsor in cosponsors:
			cosponsor_party = cosponsor.role.caucus or cosponsor.role.party
			assert cosponsor_party in ("Republican", "Democrat")
			if cosponsor_party != sponsor_party:
				has_bipartisan_cosponsor.append(bill)
				break

        # Check if a companion bill was introduced during the time period.
		if RelatedBill.objects.filter(bill=bill, relation="identical", related_bill__introduced_date__gte=startdate, related_bill__introduced_date__lte=enddate).exists():
			has_companion.append(bill)


	stats["bills-reported"] = {
		"value": len(was_reported),
		"bills": make_bill_entries(was_reported),
	}

	stats["bills-with-committee-leaders"] = {
		"value": len(has_cmte_leaders),
		"bills": make_bill_entries(has_cmte_leaders),
	}

	stats["bills-with-cosponsor-other-party"] = {
		"value": len(has_bipartisan_cosponsor),
		"bills": make_bill_entries(has_bipartisan_cosponsor),
		"num_bills": len(bills),
	}

	stats["bills-with-companion"] = {
		"value": len(has_companion),
		"other_chamber": RoleType.by_value(role.role_type).congress_chamber_other,
		"bills": make_bill_entries(has_companion),
	}
Exemple #8
0
def main(options):
    """
    Update Person and PersonRole models.
    
    Do safe update: touch only those records
    which have been changed.
    """

    BASE_PATH = CONGRESS_LEGISLATORS_PATH
    SRC_FILES = [
        'legislators-current', 'legislators-historical',
        'legislators-social-media', 'executive'
    ]  # order matters

    for p in SRC_FILES:
        f = BASE_PATH + p + ".yaml"
        if not File.objects.is_changed(f) and not options.force:
            log.info('File %s was not changed' % f)
        else:
            # file modified...
            break
    else:
        # no 'break' ==> no files modified
        return

    # Start parsing.

    had_error = False

    # Get combined data.
    legislator_data = {}
    leg_id_map = {}
    for p in SRC_FILES:
        log.info('Opening %s...' % p)
        f = BASE_PATH + p + ".yaml"
        y = yaml_load(f)
        for m in y:
            if p == "legislators-current":
                # We know all terms but the last are non-current and the last is.
                for r in m["terms"]:
                    r["current"] = False
                m["terms"][-1]["current"] = True
            elif p == "legislators-historical":
                # We know all terms are non-current.
                for r in m["terms"]:
                    r["current"] = False

            if p != 'legislators-social-media':
                govtrack_id = m["id"].get("govtrack")

                # For the benefit of the social media file, make a mapping of IDs.
                for k, v in m["id"].items():
                    if type(v) != list:
                        leg_id_map[(k, v)] = govtrack_id
            else:
                # GovTrack IDs are not always listed in this file.
                govtrack_id = None
                for k, v in m["id"].items():
                    if type(v) != list and (k, v) in leg_id_map:
                        govtrack_id = leg_id_map[(k, v)]
                        break

            if not govtrack_id:
                print("No GovTrack ID:")
                pprint.pprint(m)
                had_error = True
                continue

            if govtrack_id not in legislator_data:
                legislator_data[govtrack_id] = m
            elif p == "legislators-social-media":
                legislator_data[govtrack_id]["social"] = m["social"]
            elif p == "executive":
                legislator_data[govtrack_id]["terms"].extend(m["terms"])
            else:
                raise ValueError("Duplication in an unexpected way (%d, %s)." %
                                 (govtrack_id, p))

    person_processor = PersonProcessor()
    role_processor = PersonRoleProcessor()

    existing_persons = set(Person.objects.values_list('pk', flat=True))
    processed_persons = set()
    created_persons = set()

    progress = Progress(total=len(legislator_data))
    log.info('Processing persons')

    for node in legislator_data.values():
        # Wrap each iteration in try/except
        # so that if some node breaks the parsing process
        # then other nodes could be parsed
        try:
            person = person_processor.process(Person(), node)

            # Create cached name strings. This is done again later
            # after the roles are updated.
            person.set_names()

            # Now try to load the person with such ID from
            # database. If found it then just update it
            # else create new Person object
            try:
                ex_person = Person.objects.get(pk=person.pk)
                if person_processor.changed(ex_person,
                                            person) or options.force:
                    # If the person has PK of existing record,
                    # coming in via the YAML-specified GovTrack ID,
                    # then Django ORM will update existing record
                    if not options.force:
                        log.warn("Updated %s" % person)
                    person.save()

            except Person.DoesNotExist:
                created_persons.add(person.pk)
                person.save()
                log.warn("Created %s" % person)

            processed_persons.add(person.pk)

            # Parse all of the roles.
            new_roles = []
            for termnode in node['terms']:
                role = role_processor.process(PersonRole(), termnode)
                role.person = person
                role.extra = filter_yaml_term_structure(
                    termnode)  # copy in the whole YAML structure

                # Is this role current? For legislators, same as whether it came from legislators-current, which eases Jan 3 transitions when we can't distinguish by date.
                if "current" in termnode:
                    role.current = termnode["current"]

                # But executives...
                else:
                    now = datetime.now().date()
                    role.current = role.startdate <= now and role.enddate >= now
                    # Because of date overlaps at noon transition dates, ensure that only the last term that covers
                    # today is current --- reset past roles to not current. Doesn't handle turning off retirning people tho.
                    for r in new_roles:
                        r.current = False

                # Scan for most recent leadership role within the time period of this term,
                # which isn't great for Senators because it's likely it changed a few times
                # within a term, especially if there was a party switch.
                role.leadership_title = None
                for leadership_node in node.get("leadership_roles", []):
                    # must match on date and chamber
                    if leadership_node["start"] >= role.enddate.isoformat():
                        continue  # might start on the same day but is for the next Congress
                    if "end" in leadership_node and leadership_node[
                            "end"] <= role.startdate.isoformat():
                        continue  # might start on the same day but is for the previous Congress
                    if leadership_node["chamber"] != RoleType.by_value(
                            role.role_type).congress_chamber.lower():
                        continue
                    role.leadership_title = leadership_node["title"]

                new_roles.append(role)

            # Try matching the new roles to existing db records. Since we don't have a primry key
            # in the source data, we have to match on the record values. But because of errors in data,
            # term start/end dates can change, so matching has to be a little fuzzy.
            existing_roles = list(PersonRole.objects.filter(person=person))
            matches = []

            def run_match_rule(rule):
                import itertools
                for new_role, existing_role in itertools.product(
                        new_roles, existing_roles):
                    if new_role not in new_roles or existing_role not in existing_roles:
                        continue  # already matched on a previous iteration
                    if new_role.role_type != existing_role.role_type: continue
                    if new_role.state != existing_role.state: continue
                    if rule(new_role, existing_role):
                        matches.append((new_role, existing_role))
                        new_roles.remove(new_role)
                        existing_roles.remove(existing_role)

            # First match exactly, then exact on just one date, then on contractions and expansions.
            run_match_rule(lambda new_role, existing_role: new_role.startdate
                           == existing_role.startdate and new_role.enddate ==
                           existing_role.enddate)
            run_match_rule(lambda new_role, existing_role: new_role.startdate
                           == existing_role.startdate or new_role.enddate ==
                           existing_role.enddate)
            run_match_rule(lambda new_role, existing_role: new_role.startdate
                           >= existing_role.startdate and new_role.enddate <=
                           existing_role.enddate)
            run_match_rule(lambda new_role, existing_role: new_role.startdate
                           <= existing_role.startdate and new_role.enddate >=
                           existing_role.enddate)

            # Update the database entries that correspond with records in the data file.
            did_update_any = False
            for new_role, existing_role in matches:
                new_role.id = existing_role.id
                if role_processor.changed(existing_role,
                                          new_role) or options.force:
                    new_role.save()
                    did_update_any = True
                    if not options.force:
                        log.warn("Updated %s" % new_role)

            # If we have mutliple records on disk that didn't match and multiple records in the database
            # that didn't match, then we don't know how to align them.
            if len(new_roles) > 0 and len(existing_roles) > 0:
                print(new_roles)
                print(existing_roles)
                raise Exception("There is an unmatched role.")

            # Otherwise if there are any unmatched new roles, we can just add them.
            for role in new_roles:
                log.warn("Created %s" % role)
                role.save()
                did_update_any = True

            # And likewise for any existing roles that are left over.
            for pr in existing_roles:
                print(pr.person.id, pr)
                raise ValueError("Deleted role??")
                log.warn("Deleted %s" % pr)
                pr.delete()

            if did_update_any and not options.disable_events:
                # Create the events for the roles after all have been loaded
                # because we don't create events for ends of terms and
                # starts of terms that are adjacent. Refresh the list to get
                # the roles in order.
                role_list = list(
                    PersonRole.objects.filter(
                        person=person).order_by('startdate'))
                for i in range(len(role_list)):
                    role_list[i].create_events(
                        role_list[i - 1] if i > 0 else None,
                        role_list[i + 1] if i < len(role_list) - 1 else None)

            # The name can't be determined until all of the roles are set. If
            # it changes, re-save. Unfortunately roles are cached so this actually
            # doesn't work yet. Re-run the parser to fix names.
            nn = (person.name, person.sortname)
            if hasattr(person, "role"):
                delattr(person, "role")  # clear the cached info
            person._most_recent_role = None  # clear cache here too
            person.set_names()
            if nn != (person.name, person.sortname):
                log.warn("%s is now %s." % (nn[0], person.name))
                person.save()

        except Exception as ex:
            # Catch unexpected exceptions and log them
            pprint.pprint(node)
            log.error('', exc_info=ex)
            had_error = True

        progress.tick()

    log.info('Processed persons: %d' % len(processed_persons))
    log.info('Created persons: %d' % len(created_persons))

    if not had_error:
        # Remove person which were not found in XML file
        removed_persons = existing_persons - processed_persons
        for pk in removed_persons:
            p = Person.objects.get(pk=pk)
            if p.roles.all().count() > 0:
                log.warn("Missing? Deleted? %d: %s" % (p.id, p))
            else:
                log.warn("Deleting... %d: %s (remember to prune_index!)" %
                         (p.id, p))
                raise Exception("Won't delete!")
                p.delete()
        log.info('Missing/deleted persons: %d' % len(removed_persons))

        # Mark the files as processed.
        for p in SRC_FILES:
            f = BASE_PATH + p + ".yaml"
            File.objects.save_file(f)

    update_twitter_list()
Exemple #9
0
def main(options):
    """
    Update Person and PersonRole models.
    
    Do safe update: touch only those records
    which have been changed.
    """

    BASE_PATH = CONGRESS_LEGISLATORS_PATH
    SRC_FILES = ['legislators-current', 'legislators-historical', 'legislators-social-media', 'executive'] # order matters

    for p in SRC_FILES:
        f = BASE_PATH + p + ".yaml"
        if not File.objects.is_changed(f) and not options.force:
            log.info('File %s was not changed' % f)
        else:
            # file modified...
            break
    else:
        # no 'break' ==> no files modified
        return

    # Start parsing.
    
    had_error = False

    # Get combined data.
    legislator_data = { }
    leg_id_map = { }
    for p in SRC_FILES:
        log.info('Opening %s...' % p)
        f = BASE_PATH + p + ".yaml"
        y = yaml_load(f)
        for m in y:
            if p != 'legislators-social-media':
                govtrack_id = m["id"].get("govtrack")
                
                # For the benefit of the social media file, make a mapping of IDs.
                for k, v in m["id"].items():
                    if type(v) != list:
                        leg_id_map[(k,v)] = govtrack_id
            else:
                # GovTrack IDs are not always listed in this file.
                govtrack_id = None
                for k, v in m["id"].items():
                    if type(v) != list and (k, v) in leg_id_map:
                        govtrack_id = leg_id_map[(k,v)]
                        break
            
            if not govtrack_id:
                print "No GovTrack ID:"
                pprint.pprint(m)
                had_error = True
                continue
                
            if govtrack_id not in legislator_data:
                legislator_data[govtrack_id] = m
            elif p == "legislators-social-media":
                legislator_data[govtrack_id]["social"] = m["social"]
            elif p == "executive":
                legislator_data[govtrack_id]["terms"].extend( m["terms"] )
            else:
                raise ValueError("Duplication in an unexpected way (%d, %s)." % (govtrack_id, p))
    
    person_processor = PersonProcessor()
    role_processor = PersonRoleProcessor()

    existing_persons = set(Person.objects.values_list('pk', flat=True))
    processed_persons = set()
    created_persons = set()

    progress = Progress(total=len(legislator_data))
    log.info('Processing persons')

    for node in legislator_data.values():
        # Wrap each iteration in try/except
        # so that if some node breaks the parsing process
        # then other nodes could be parsed
        try:
            person = person_processor.process(Person(), node)
            
            # Create cached name strings. This is done again later
            # after the roles are updated.
            person.set_names()

            # Now try to load the person with such ID from
            # database. If found it then just update it
            # else create new Person object
            try:
                ex_person = Person.objects.get(pk=person.pk)
                if person_processor.changed(ex_person, person) or options.force:
                    # If the person has PK of existing record,
                    # coming in via the YAML-specified GovTrack ID,
                    # then Django ORM will update existing record
                    if not options.force:
                        log.warn("Updated %s" % person)
                    person.save()
                    
            except Person.DoesNotExist:
                created_persons.add(person.pk)
                person.save()
                log.warn("Created %s" % person)

            processed_persons.add(person.pk)

            # Process roles of the person
            roles = list(PersonRole.objects.filter(person=person))
            existing_roles = set(PersonRole.objects.filter(person=person).values_list('pk', flat=True))
            processed_roles = set()
            role_list = []
            for role in node['terms']:
                role = role_processor.process(PersonRole(), role)
                role.person = person
                
                role.current = role.startdate <= datetime.now().date() and role.enddate >= datetime.now().date() # \
                        #and CURRENT_CONGRESS in role.congress_numbers()

                # Scan for most recent leadership role within the time period of this term,
                # which isn't great for Senators because it's likely it changed a few times
                # within a term, especially if there was a party switch.
                role.leadership_title = None
                for leadership_node in node.get("leadership_roles", []):
                    # must match on date and chamber
                    if leadership_node["start"] >= role.enddate.isoformat(): continue # might start on the same day but is for the next Congress
                    if "end" in leadership_node and leadership_node["end"] <= role.startdate.isoformat(): continue # might start on the same day but is for the previous Congress
                    if leadership_node["chamber"] != RoleType.by_value(role.role_type).congress_chamber.lower(): continue
                    role.leadership_title = leadership_node["title"]
                
                # Try to match this role with one already in the database.
                # First search for an exact match on type/start/end.
                ex_role = None
                for r in roles:
                    if role.role_type == r.role_type and r.startdate == role.startdate and r.enddate == role.enddate:
                        ex_role = r
                        break
                        
                # Otherwise match on type/start only.
                if not ex_role:
                    for r in roles:
                        if role.role_type == r.role_type and r.startdate == role.startdate:
                            ex_role = r
                            break
                        
                if ex_role:    
                    # These roles correspond.
                    processed_roles.add(ex_role.id)
                    role.id = ex_role.id
                    if role_processor.changed(ex_role, role) or options.force:
                        role.save()
                        role_list.append(role)
                        if not options.force:
                            log.warn("Updated %s" % role)
                    roles.remove(ex_role) # don't need to try matching this to any other node
                else:
                    # Didn't find a matching role.
                    if len([r for r in roles if r.role_type == role.role_type]) > 0:
                        print role, "is one of these?"
                        for ex_role in roles:
                            print "\t", ex_role
                        raise Exception("There is an unmatched role.")
                    log.warn("Created %s" % role)
                    role.save()
                    role_list.append(role)
                        
            # create the events for the roles after all have been loaded
            # because we don't create events for ends of terms and
            # starts of terms that are adjacent.
            if not options.disable_events:
                for i in xrange(len(role_list)):
                    role_list[i].create_events(
                        role_list[i-1] if i > 0 else None,
                        role_list[i+1] if i < len(role_list)-1 else None
                        )
            
            removed_roles = existing_roles - processed_roles
            for pk in removed_roles:
                pr = PersonRole.objects.get(pk=pk)
                print pr.person.id, pr
                raise ValueError("Deleted role??")
                log.warn("Deleted %s" % pr)
                pr.delete()
            
            # The name can't be determined until all of the roles are set. If
            # it changes, re-save. Unfortunately roles are cached so this actually
            # doesn't work yet. Re-run the parser to fix names.
            nn = (person.name, person.sortname)
            if hasattr(person, "role"): delattr(person, "role") # clear the cached info
            person.set_names()
            if nn != (person.name, person.sortname):
                log.warn("%s is now %s." % (nn[0], person.name))
                person.save()
            
        except Exception, ex:
            # Catch unexpected exceptions and log them
            pprint.pprint(node)
            log.error('', exc_info=ex)
            had_error = True

        progress.tick()
Exemple #10
0
def get_cohorts(person, role, congress, session, committee_membership):
    cohorts = []

    # chamber
    chamber = RoleType.by_value(role.role_type).congress_chamber.lower()
    cohorts.append({"key": chamber})

    # party
    cohorts.append({
        "key": "party-%s-%s" % (chamber, role.party.lower()),
        "chamber": chamber,
        "party": role.party
    })

    # state delegation
    if role.role_type == RoleType.representative:
        cohorts.append({
            "key": "house-state-delegation-" + role.state.lower(),
            "state": role.state
        })

    # chamber leadership position
    if role.leadership_title:
        cohorts.append({
            "key": chamber + "-leadership",
            "position": role.leadership_title
        })

    # freshmen/sophomores
    min_start_date = None
    prev_congresses_served = set()
    # use enddate__lte=endate to include the current role itself since for
    # senators their current role may span previous congresses
    for r in PersonRole.objects.filter(person=person,
                                       role_type=role.role_type,
                                       enddate__lte=role.enddate):
        if not min_start_date or r.startdate < min_start_date:
            min_start_date = r.startdate
        for c in r.congress_numbers():
            if c < congress:
                prev_congresses_served.add(c)
    if len(prev_congresses_served) == 0:
        cohorts.append({"key": chamber + "-freshmen", "chamber": chamber})
    if len(prev_congresses_served) == 1:
        cohorts.append({"key": chamber + "-sophomores", "chamber": chamber})
    if min_start_date and (role.enddate - min_start_date).days > 365.25 * 10:
        cohorts.append({
            "key": chamber + "-tenyears",
            "chamber": chamber,
            "first_date": min_start_date.isoformat()
        })

    # committee leadership positions
    committee_positions = []
    for committee, committee_role in committee_membership.get(person.id,
                                                              {}).items():
        if len(committee) != 4: continue  # exclude subcommittees
        if committee_role in (CommitteeMemberRole.ranking_member,
                              CommitteeMemberRole.vice_chairman,
                              CommitteeMemberRole.chairman):
            committee_positions.append((committee, committee_role))
    if len(committee_positions) > 0:
        cohorts.append({
            "key": chamber + "-committee-leaders",
            "chamber": chamber,
            "positions": committee_positions
        })

    # safe vs competitive seats
    if role.role_type == RoleType.representative:
        global competitive_seats
        if competitive_seats is None:
            competitive_seats = set()
            for line in open("analysis/cook_competitive_seats-%s.txt" %
                             session):
                if line[0] == "#": continue
                (state, district) = line.split(" ")[0].split("-")
                competitive_seats.add((state, int(district)))
        if (role.state, role.district) in competitive_seats:
            cohorts.append({"key": chamber + "-competitive-seat"})
        else:
            cohorts.append({"key": chamber + "-safe-seat"})

    return cohorts
def get_cohorts(person, role, congress, session, committee_membership):
	cohorts = []

	# chamber
	chamber = RoleType.by_value(role.role_type).congress_chamber.lower()
	cohorts.append({ "key": chamber })

	# party
	cohorts.append({ "key": "party-%s-%s" % (chamber, role.party.lower()), "chamber": chamber, "party": role.party })

	# state delegation
	if role.role_type == RoleType.representative:
		cohorts.append({ "key": "house-state-delegation-" + role.state.lower(), "state": role.state })

	# chamber leadership position
	if role.leadership_title:
		cohorts.append({ "key": chamber + "-leadership", "position": role.leadership_title })

	# freshmen/sophomores
	min_start_date = None
	prev_congresses_served = set()
	# use enddate__lte=endate to include the current role itself since for
	# senators their current role may span previous congresses
	for r in PersonRole.objects.filter(person=person, role_type=role.role_type, enddate__lte=role.enddate):
		if not min_start_date or r.startdate < min_start_date: min_start_date = r.startdate
		for c in r.congress_numbers():
			if c < congress:
				prev_congresses_served.add(c)
	if person.id in (412505, 412503, 412506, 412507) and session == '2014':
		# Schatz served only a few days in the 112th Congress. Other members took office
		# within the last two months of the 112th Congress. For the 2013 stats, I originally
		# classified them as sophomores. I revised it to drop that cohort from Schatz after
		# hearing from his office. For 2014 stats, I am classifying them all as a freshman
		# rather than sophomores.
		assert prev_congresses_served == { 112 }
		prev_congresses_served = set()
	if len(prev_congresses_served) == 0: cohorts.append({ "key": chamber + "-freshmen", "chamber": chamber })
	if len(prev_congresses_served) == 1: cohorts.append({ "key": chamber + "-sophomores", "chamber": chamber })
	if min_start_date and (role.enddate - min_start_date).days > 365.25*10: cohorts.append({ "key": chamber + "-tenyears", "chamber": chamber, "first_date": min_start_date.isoformat()  })

	# committee leadership positions
	committee_positions = []
	for committee, committee_role in committee_membership.get(person.id, {}).items():
		if len(committee) != 4: continue # exclude subcommittees
		if committee_role in (CommitteeMemberRole.ranking_member, CommitteeMemberRole.vice_chairman, CommitteeMemberRole.chairman):
			committee_positions.append( (committee, committee_role) )
	if len(committee_positions) > 0:
		cohorts.append({ "key": chamber + "-committee-leaders", "chamber": chamber, "positions": committee_positions })

	# safe vs competitive seats
	if role.role_type == RoleType.representative:
		global competitive_seats
		if competitive_seats is None:
			competitive_seats = set()
			for line in open("analysis/cook_competitive_seats-%s.txt" % session):
				if line[0] == "#": continue
				(state, district) = line.split(" ")[0].split("-")
				competitive_seats.add( (state, int(district)) )
		if (role.state, role.district) in competitive_seats:
			cohorts.append({ "key": chamber + "-competitive-seat" })
		else:
			cohorts.append({ "key": chamber + "-safe-seat" })

	return cohorts
def get_sponsor_stats(person, role, stats, congress, startdate, enddate, committee_membership):
	# How many bills did the Member introduce during this time window?
	bills = Bill.objects.filter(sponsor=person, congress=congress,
		introduced_date__gte=startdate, introduced_date__lte=enddate)
	stats["bills-introduced"] = {
		"value": bills.count(),
	}

	# How many bills were enacted within this time window?
	#bills_enacted = bills.filter(current_status__in=BillStatus.final_status_passed_bill,
	#	current_status_date__gte=startdate, current_status_date__lte=enddate)
	def was_bill_enacted(b, startdate, enddate):
		return b.enacted_ex(restrict_to_activity_in_date_range=(startdate.isoformat(), enddate.isoformat()))
	bills_enacted = [b for b in bills if was_bill_enacted(b, startdate, enddate)]
	stats["bills-enacted"] = {
		"value": len(bills_enacted),
		"bills": make_bill_entries(bills_enacted),
	}

	bills = list(bills)
	was_reported = []
	has_cmte_leaders = []
	has_cosponsors_both_parties = 0
	has_companion = []
	for bill in bills:
		# In order to test these remaining factors, we have to look more closely
		# at the bill because we can only use activities that ocurred during the
		# time window so that if we re-run this script on the same window at a
		# later date nothing changes -- i.e. future activitiy on bills should
		# not affect 1st Session statistics.

		# Check if the bill was reported during this time period.
		for datestr, st, text, srcxml in bill.major_actions:
			date = eval(datestr)
			if isinstance(date, datetime.datetime): date = date.date()
			if date >= startdate and date <= enddate and st == BillStatus.reported:
				was_reported.append(bill)
				break # make sure not to double-count any bills in case of data errors

		# Check whether any cosponsors are on relevant committees.
		cosponsors = list(Cosponsor.objects.filter(bill=bill, joined__gte=startdate, joined__lte=enddate).select_related("person", "role"))
		x = False
		for committee in list(bill.committees.all()):
			for cosponsor in cosponsors:
				if committee_membership.get(cosponsor.person.id, {}).get(committee.code) in (CommitteeMemberRole.ranking_member, CommitteeMemberRole.vice_chairman, CommitteeMemberRole.chairman):
					x = True
		if x: has_cmte_leaders.append(bill)

		# Check whether there's a cosponsor from both parties.
		co_d = False
		co_r = False
		for cosponsor in cosponsors:
			if cosponsor.role.party == "Democrat": co_d = True
			if cosponsor.role.party == "Republican": co_r = True
		if co_d and co_r:
			has_cosponsors_both_parties += 1

        # Check if a companion bill was introduced during the time period.
		if RelatedBill.objects.filter(bill=bill, relation="identical", related_bill__introduced_date__gte=startdate, related_bill__introduced_date__lte=enddate).exists():
			has_companion.append(bill)


	stats["bills-reported"] = {
		"value": len(was_reported),
		"bills": make_bill_entries(was_reported),
	}

	stats["bills-with-committee-leaders"] = {
		"value": len(has_cmte_leaders),
		"bills": make_bill_entries(has_cmte_leaders),
	}

	if len(bills) > 10:
		stats["bills-with-cosponsors-both-parties"] = {
			"value": 100.0*float(has_cosponsors_both_parties)/float(len(bills)),
			"num_bills": len(bills),
		}

	stats["bills-with-companion"] = {
		"value": len(has_companion),
		"other_chamber": RoleType.by_value(role.role_type).congress_chamber_other,
		"bills": make_bill_entries(has_companion),
	}
Exemple #13
0
def main(options):
    """
    Update Person and PersonRole models.
    
    Do safe update: touch only those records
    which have been changed.
    """

    BASE_PATH = CONGRESS_LEGISLATORS_PATH
    SRC_FILES = ['legislators-current', 'legislators-historical', 'legislators-social-media', 'executive'] # order matters

    for p in SRC_FILES:
        f = BASE_PATH + p + ".yaml"
        if not File.objects.is_changed(f) and not options.force:
            log.info('File %s was not changed' % f)
        else:
            # file modified...
            break
    else:
        # no 'break' ==> no files modified
        return

    # Start parsing.
    
    had_error = False

    # Get combined data.
    legislator_data = { }
    leg_id_map = { }
    for p in SRC_FILES:
        log.info('Opening %s...' % p)
        f = BASE_PATH + p + ".yaml"
        y = yaml_load(f)
        for m in y:
            if p == "legislators-current":
                # We know all terms but the last are non-current and the last is.
                for r in m["terms"]: r["current"] = False
                m["terms"][-1]["current"] = True
            elif p == "legislators-historical":
                # We know all terms are non-current.
                for r in m["terms"]: r["current"] = False

            if p != 'legislators-social-media':
                govtrack_id = m["id"].get("govtrack")
                
                # For the benefit of the social media file, make a mapping of IDs.
                for k, v in m["id"].items():
                    if type(v) != list:
                        leg_id_map[(k,v)] = govtrack_id
            else:
                # GovTrack IDs are not always listed in this file.
                govtrack_id = None
                for k, v in m["id"].items():
                    if type(v) != list and (k, v) in leg_id_map:
                        govtrack_id = leg_id_map[(k,v)]
                        break
            
            if not govtrack_id:
                print "No GovTrack ID:"
                pprint.pprint(m)
                had_error = True
                continue
                
            if govtrack_id not in legislator_data:
                legislator_data[govtrack_id] = m
            elif p == "legislators-social-media":
                legislator_data[govtrack_id]["social"] = m["social"]
            elif p == "executive":
                legislator_data[govtrack_id]["terms"].extend( m["terms"] )
            else:
                raise ValueError("Duplication in an unexpected way (%d, %s)." % (govtrack_id, p))
    
    person_processor = PersonProcessor()
    role_processor = PersonRoleProcessor()

    existing_persons = set(Person.objects.values_list('pk', flat=True))
    processed_persons = set()
    created_persons = set()

    progress = Progress(total=len(legislator_data))
    log.info('Processing persons')

    for node in legislator_data.values():
        # Wrap each iteration in try/except
        # so that if some node breaks the parsing process
        # then other nodes could be parsed
        try:
            person = person_processor.process(Person(), node)
            
            # Create cached name strings. This is done again later
            # after the roles are updated.
            person.set_names()

            # Now try to load the person with such ID from
            # database. If found it then just update it
            # else create new Person object
            try:
                ex_person = Person.objects.get(pk=person.pk)
                if person_processor.changed(ex_person, person) or options.force:
                    # If the person has PK of existing record,
                    # coming in via the YAML-specified GovTrack ID,
                    # then Django ORM will update existing record
                    if not options.force:
                        log.warn("Updated %s" % person)
                    person.save()
                    
            except Person.DoesNotExist:
                created_persons.add(person.pk)
                person.save()
                log.warn("Created %s" % person)

            processed_persons.add(person.pk)

            # Parse all of the roles.
            new_roles = []
            for termnode in node['terms']:
                role = role_processor.process(PersonRole(), termnode)
                role.person = person
                role.extra = filter_yaml_term_structure(termnode) # copy in the whole YAML structure

                # Is this role current? For legislators, same as whether it came from legislators-current, which eases Jan 3 transitions when we can't distinguish by date.
                if "current" in termnode:
                    role.current = termnode["current"]

                # But executives...
                else:
                    now = datetime.now().date()
                    role.current = role.startdate <= now and role.enddate >= now
                    # Because of date overlaps at noon transition dates, ensure that only the last term that covers
                    # today is current --- reset past roles to not current. Doesn't handle turning off retirning people tho.
                    for r in new_roles: r.current = False

                # Scan for most recent leadership role within the time period of this term,
                # which isn't great for Senators because it's likely it changed a few times
                # within a term, especially if there was a party switch.
                role.leadership_title = None
                for leadership_node in node.get("leadership_roles", []):
                    # must match on date and chamber
                    if leadership_node["start"] >= role.enddate.isoformat(): continue # might start on the same day but is for the next Congress
                    if "end" in leadership_node and leadership_node["end"] <= role.startdate.isoformat(): continue # might start on the same day but is for the previous Congress
                    if leadership_node["chamber"] != RoleType.by_value(role.role_type).congress_chamber.lower(): continue
                    role.leadership_title = leadership_node["title"]

                new_roles.append(role)

            # Try matching the new roles to existing db records. Since we don't have a primry key
            # in the source data, we have to match on the record values. But because of errors in data,
            # term start/end dates can change, so matching has to be a little fuzzy.
            existing_roles = list(PersonRole.objects.filter(person=person))
            matches = []
            def run_match_rule(rule):
                import itertools
                for new_role, existing_role in itertools.product(new_roles, existing_roles):
                    if new_role not in new_roles or existing_role not in existing_roles: continue # already matched on a previous iteration
                    if new_role.role_type != existing_role.role_type: continue
                    if new_role.state != existing_role.state: continue
                    if rule(new_role, existing_role):
                        matches.append((new_role, existing_role))
                        new_roles.remove(new_role)
                        existing_roles.remove(existing_role)

            # First match exactly, then exact on just one date, then on contractions and expansions.
            run_match_rule(lambda new_role, existing_role : new_role.startdate == existing_role.startdate and new_role.enddate == existing_role.enddate)
            run_match_rule(lambda new_role, existing_role : new_role.startdate == existing_role.startdate or new_role.enddate == existing_role.enddate)
            run_match_rule(lambda new_role, existing_role : new_role.startdate >= existing_role.startdate and new_role.enddate <= existing_role.enddate)
            run_match_rule(lambda new_role, existing_role : new_role.startdate <= existing_role.startdate and new_role.enddate >= existing_role.enddate)

            # Update the database entries that correspond with records in the data file.
            did_update_any = False
            for new_role, existing_role in matches:
                new_role.id = existing_role.id
                if role_processor.changed(existing_role, new_role) or options.force:
                    new_role.save()
                    did_update_any = True
                    if not options.force:
                        log.warn("Updated %s" % new_role)

            # If we have mutliple records on disk that didn't match and multiple records in the database
            # that didn't match, then we don't know how to align them.
            if len(new_roles) > 0 and len(existing_roles) > 0:
                print(new_roles)
                print(existing_roles)
                raise Exception("There is an unmatched role.")

            # Otherwise if there are any unmatched new roles, we can just add them.
            for role in new_roles:
                log.warn("Created %s" % role)
                role.save()
                did_update_any = True
            
            # And likewise for any existing roles that are left over.
            for pr in existing_roles:
                print pr.person.id, pr
                raise ValueError("Deleted role??")
                log.warn("Deleted %s" % pr)
                pr.delete()
            
            if did_update_any and not options.disable_events:
                # Create the events for the roles after all have been loaded
                # because we don't create events for ends of terms and
                # starts of terms that are adjacent. Refresh the list to get
                # the roles in order.
                role_list = list(PersonRole.objects.filter(person=person).order_by('startdate'))
                for i in xrange(len(role_list)):
                    role_list[i].create_events(
                        role_list[i-1] if i > 0 else None,
                        role_list[i+1] if i < len(role_list)-1 else None
                        )
            
            # The name can't be determined until all of the roles are set. If
            # it changes, re-save. Unfortunately roles are cached so this actually
            # doesn't work yet. Re-run the parser to fix names.
            nn = (person.name, person.sortname)
            if hasattr(person, "role"): delattr(person, "role") # clear the cached info
            person.set_names()
            if nn != (person.name, person.sortname):
                log.warn("%s is now %s." % (nn[0], person.name))
                person.save()
            
        except Exception, ex:
            # Catch unexpected exceptions and log them
            pprint.pprint(node)
            log.error('', exc_info=ex)
            had_error = True

        progress.tick()
Exemple #14
0
def get_sponsor_stats(person, role, stats, congress, startdate, enddate, committee_membership):
	# How many bills did the Member introduce during this time window?
	bills = Bill.objects.filter(sponsor=person, congress=congress,
		introduced_date__gte=startdate, introduced_date__lte=enddate)
	stats["bills-introduced"] = {
		"value": bills.count(),
	}

	# How many bills were "enacted" within this time window? Follow the was_enacted_ex logic
	# which includes text incorporation. Only use the date range on single-year stats because
	# a bill might be enacted after the end of a Congress.
	def was_bill_enacted(b, startdate, enddate):
		if (enddate-startdate).days > 365*1.5:
			return b.was_enacted_ex()
		else:
			raise ValueError()
			return b.was_enacted_ex(restrict_to_activity_in_date_range=(startdate.isoformat(), enddate.isoformat()))
	bills_enacted = [b for b in bills if was_bill_enacted(b, startdate, enddate)]
	stats["bills-enacted-ti"] = {
		"value": len(bills_enacted),
		"bills": make_bill_entries(bills_enacted),
	}

	# In order to test these remaining factors, we have to look more closely
	# at the bill because we can only use activities that ocurred during the
	# time window so that if we re-run this script on the same window at a
	# later date nothing changes -- i.e. future activitiy on bills should
	# not affect 1st Session statistics. Mostly.
	bills = list(bills)
	was_reported = []
	has_cmte_leaders = []
	has_cosponsors_both_parties = []
	has_companion = []
	for bill in bills:
		# Check if the bill was reported during this time period.
		for datestr, st, text, srcxml in bill.major_actions:
			date = eval(datestr)
			if isinstance(date, datetime.datetime): date = date.date()
			if date >= startdate and date <= enddate and st == BillStatus.reported:
				was_reported.append(bill)
				break # make sure not to double-count any bills in case of data errors

		# Check whether any cosponsors are on relevant committees.
		# Warning: Committee membership data is volatile, so re-running the stats may come out different.
		cosponsors = list(Cosponsor.objects.filter(bill=bill, joined__gte=startdate, joined__lte=enddate).select_related("person", "role"))
		x = False
		for committee in list(bill.committees.all()):
			for cosponsor in cosponsors:
				if committee_membership.get(cosponsor.person.id, {}).get(committee.code) in (CommitteeMemberRole.ranking_member, CommitteeMemberRole.vice_chairman, CommitteeMemberRole.chairman):
					x = True
		if x: has_cmte_leaders.append(bill)

		# Check whether there's a cosponsor from both parties.
		# Warning: If someone switches parties, this will change.
		co_d = False
		co_r = False
		for cosponsor in cosponsors:
			if cosponsor.role.party == "Democrat": co_d = True
			if cosponsor.role.party == "Republican": co_r = True
		if co_d and co_r:
			has_cosponsors_both_parties.append(bill)

        # Check if a companion bill was introduced during the time period.
		if RelatedBill.objects.filter(bill=bill, relation="identical", related_bill__introduced_date__gte=startdate, related_bill__introduced_date__lte=enddate).exists():
			has_companion.append(bill)


	stats["bills-reported"] = {
		"value": len(was_reported),
		"bills": make_bill_entries(was_reported),
	}

	stats["bills-with-committee-leaders"] = {
		"value": len(has_cmte_leaders),
		"bills": make_bill_entries(has_cmte_leaders),
	}

	stats["bills-with-cosponsors-both-parties-count"] = {
		"value": len(has_cosponsors_both_parties),
		"bills": make_bill_entries(has_cosponsors_both_parties),
		"num_bills": len(bills),
	}

	stats["bills-with-companion"] = {
		"value": len(has_companion),
		"other_chamber": RoleType.by_value(role.role_type).congress_chamber_other,
		"bills": make_bill_entries(has_companion),
	}
Exemple #15
0
def get_cohorts(person, role, congress, session, committee_membership):
    cohorts = []

    # chamber
    chamber = RoleType.by_value(role.role_type).congress_chamber.lower()
    cohorts.append({"key": chamber})

    # party
    cohorts.append({
        "key": "party-%s-%s" % (chamber, role.party.lower()),
        "chamber": chamber,
        "party": role.party
    })

    # state delegation
    if role.role_type == RoleType.representative:
        cohorts.append({
            "key": "house-state-delegation-" + role.state.lower(),
            "state": role.state
        })

    # chamber leadership position
    if role.leadership_title:
        cohorts.append({
            "key": chamber + "-leadership",
            "position": role.leadership_title
        })

    # freshmen/sophomores
    min_start_date = None
    prev_congresses_served = set()
    # use enddate__lte=endate to include the current role itself since for
    # senators their current role may span previous congresses
    for r in PersonRole.objects.filter(person=person,
                                       role_type=role.role_type,
                                       enddate__lte=role.enddate):
        if not min_start_date or r.startdate < min_start_date:
            min_start_date = r.startdate
        for c in r.congress_numbers():
            if c < congress:
                prev_congresses_served.add(c)
    if person.id in (412505, 412503, 412506, 412507):
        # Schatz served only a few days in the 112th Congress. Other members took office
        # within the last two months of the 112th Congress. For the 2013 stats, I originally
        # classified them as sophomores. I revised it to drop that cohort from Schatz after
        # hearing from his office. For 2014 stats, I am classifying them all as a freshman
        # rather than sophomores; for 2015/2016 I am classifying them as sophomores.
        if session == "2014":
            assert prev_congresses_served == {112}
            prev_congresses_served = set()
        elif session in ("2015", "2016"):
            assert prev_congresses_served == {112, 113}
            prev_congresses_served = set([113])
    if person.id in (412605, 412606, 412607):
        # Similarly, Dave Brat (412605), Donald Norcross (412606), and Alma Adams (412607)
        # took office on 2014-11-12, but we'll treat them as freshman in the 114th Congress
        # rather than sophomores.
        if session in ("2015", "2016"):
            assert prev_congresses_served == {113}
            prev_congresses_served = set()
        elif session in ("2017", "2018"):
            assert prev_congresses_served == {113, 114}
            prev_congresses_served = set([114])
    if len(prev_congresses_served) == 0:
        cohorts.append({"key": chamber + "-freshmen", "chamber": chamber})
    if len(prev_congresses_served) == 1:
        cohorts.append({"key": chamber + "-sophomores", "chamber": chamber})
    if min_start_date and (role.enddate - min_start_date).days > 365.25 * 10:
        cohorts.append({
            "key": chamber + "-tenyears",
            "chamber": chamber,
            "first_date": min_start_date.isoformat()
        })

    # committee leadership positions
    committee_positions = []
    for committee, committee_role in committee_membership.get(person.id,
                                                              {}).items():
        if len(committee) != 4: continue  # exclude subcommittees
        if committee_role in (CommitteeMemberRole.ranking_member,
                              CommitteeMemberRole.vice_chairman,
                              CommitteeMemberRole.chairman):
            committee_positions.append((committee, committee_role))
    if len(committee_positions) > 0:
        cohorts.append({
            "key": chamber + "-committee-leaders",
            "chamber": chamber,
            "positions": committee_positions
        })

    # safe vs competitive seats
    if role.role_type == RoleType.representative:
        global competitive_seats
        if competitive_seats is None:
            competitive_seats = set()
            for line in open("analysis/cook_competitive_seats-%s.txt" %
                             session):
                if line[0] == "#": continue
                (state, district) = line.split(" ")[0].split("-")
                competitive_seats.add((state, int(district)))
        if (role.state, role.district) in competitive_seats:
            cohorts.append({"key": chamber + "-competitive-seat"})
        else:
            cohorts.append({"key": chamber + "-safe-seat"})

    return cohorts
Exemple #16
0
def get_sponsor_stats(person, role, stats, congress, startdate, enddate,
                      committee_membership):
    # How many bills did the Member introduce during this time window?
    bills = Bill.objects.filter(sponsor=person,
                                congress=congress,
                                introduced_date__gte=startdate,
                                introduced_date__lte=enddate)
    stats["bills-introduced"] = {
        "value": bills.count(),
    }

    # How many bills were enacted within this time window?
    #bills_enacted = bills.filter(current_status__in=BillStatus.final_status_passed_bill,
    #	current_status_date__gte=startdate, current_status_date__lte=enddate)
    def was_bill_enacted(b, startdate, enddate):
        return b.was_enacted_ex(
            restrict_to_activity_in_date_range=(startdate.isoformat(),
                                                enddate.isoformat()))

    bills_enacted = [
        b for b in bills if was_bill_enacted(b, startdate, enddate)
    ]
    stats["bills-enacted"] = {
        "value": len(bills_enacted),
        "bills": make_bill_entries(bills_enacted),
    }

    bills = list(bills)
    was_reported = []
    has_cmte_leaders = []
    has_cosponsors_both_parties = 0
    has_companion = []
    for bill in bills:
        # In order to test these remaining factors, we have to look more closely
        # at the bill because we can only use activities that ocurred during the
        # time window so that if we re-run this script on the same window at a
        # later date nothing changes -- i.e. future activitiy on bills should
        # not affect 1st Session statistics.

        # Check if the bill was reported during this time period.
        for datestr, st, text, srcxml in bill.major_actions:
            date = eval(datestr)
            if isinstance(date, datetime.datetime): date = date.date()
            if date >= startdate and date <= enddate and st == BillStatus.reported:
                was_reported.append(bill)
                break  # make sure not to double-count any bills in case of data errors

        # Check whether any cosponsors are on relevant committees.
        cosponsors = list(
            Cosponsor.objects.filter(bill=bill,
                                     joined__gte=startdate,
                                     joined__lte=enddate).select_related(
                                         "person", "role"))
        x = False
        for committee in list(bill.committees.all()):
            for cosponsor in cosponsors:
                if committee_membership.get(cosponsor.person.id, {}).get(
                        committee.code) in (CommitteeMemberRole.ranking_member,
                                            CommitteeMemberRole.vice_chairman,
                                            CommitteeMemberRole.chairman):
                    x = True
        if x: has_cmte_leaders.append(bill)

        # Check whether there's a cosponsor from both parties.
        co_d = False
        co_r = False
        for cosponsor in cosponsors:
            if cosponsor.role.party == "Democrat": co_d = True
            if cosponsor.role.party == "Republican": co_r = True
        if co_d and co_r:
            has_cosponsors_both_parties += 1

# Check if a companion bill was introduced during the time period.
        if RelatedBill.objects.filter(
                bill=bill,
                relation="identical",
                related_bill__introduced_date__gte=startdate,
                related_bill__introduced_date__lte=enddate).exists():
            has_companion.append(bill)

    stats["bills-reported"] = {
        "value": len(was_reported),
        "bills": make_bill_entries(was_reported),
    }

    stats["bills-with-committee-leaders"] = {
        "value": len(has_cmte_leaders),
        "bills": make_bill_entries(has_cmte_leaders),
    }

    if len(bills) > 10:
        stats["bills-with-cosponsors-both-parties"] = {
            "value":
            100.0 * float(has_cosponsors_both_parties) / float(len(bills)),
            "num_bills": len(bills),
        }

    stats["bills-with-companion"] = {
        "value": len(has_companion),
        "other_chamber":
        RoleType.by_value(role.role_type).congress_chamber_other,
        "bills": make_bill_entries(has_companion),
    }
Exemple #17
0
def get_sponsor_stats(person, role, stats, congress, startdate, enddate, committee_membership):
	# How many bills did the Member introduce during this time window?
	bills = Bill.objects.filter(sponsor=person, congress=congress,
		introduced_date__gte=startdate, introduced_date__lte=enddate)
	stats["bills-introduced"] = {
		"value": bills.count(),
	}

	# How many bills were "enacted" within this time window? Follow the was_enacted_ex logic
	# which includes text incorporation. Only use the date range on single-year stats because
	# a bill might be enacted after the end of a Congress.
	def was_bill_enacted(b, startdate, enddate):
		if (enddate-startdate).days > 365*1.5:
			return b.was_enacted_ex()
		else:
			raise ValueError()
			return b.was_enacted_ex(restrict_to_activity_in_date_range=(startdate.isoformat(), enddate.isoformat()))
	bills_enacted = [b for b in bills if was_bill_enacted(b, startdate, enddate)]
	stats["bills-enacted-ti"] = {
		"value": len(bills_enacted),
		"bills": make_bill_entries(bills_enacted),
	}

	# In order to test these remaining factors, we have to look more closely
	# at the bill because we can only use activities that ocurred during the
	# time window so that if we re-run this script on the same window at a
	# later date nothing changes -- i.e. future activitiy on bills should
	# not affect 1st Session statistics. Mostly.
	bills = list(bills)
	was_reported = []
	has_cmte_leaders = []
	has_cosponsors_both_parties = []
	has_companion = []
	for bill in bills:
		# Check if the bill was reported during this time period.
		for datestr, st, text, srcxml in bill.major_actions:
			date = eval(datestr)
			if isinstance(date, datetime.datetime): date = date.date()
			if date >= startdate and date <= enddate and st == BillStatus.reported:
				was_reported.append(bill)
				break # make sure not to double-count any bills in case of data errors

		# Check whether any cosponsors are on relevant committees.
		# Warning: Committee membership data is volatile, so re-running the stats may come out different.
		cosponsors = list(Cosponsor.objects.filter(bill=bill, joined__gte=startdate, joined__lte=enddate).select_related("person", "role"))
		x = False
		for committee in list(bill.committees.all()):
			for cosponsor in cosponsors:
				if committee_membership.get(cosponsor.person.id, {}).get(committee.code) in (CommitteeMemberRole.ranking_member, CommitteeMemberRole.vice_chair, CommitteeMemberRole.chair):
					x = True
		if x: has_cmte_leaders.append(bill)

		# Check whether there's a cosponsor from both parties.
		# Warning: If someone switches parties, this will change.
		co_d = False
		co_r = False
		for cosponsor in cosponsors:
			if cosponsor.role.party == "Democrat": co_d = True
			if cosponsor.role.party == "Republican": co_r = True
		if co_d and co_r:
			has_cosponsors_both_parties.append(bill)

        # Check if a companion bill was introduced during the time period.
		if RelatedBill.objects.filter(bill=bill, relation="identical", related_bill__introduced_date__gte=startdate, related_bill__introduced_date__lte=enddate).exists():
			has_companion.append(bill)


	stats["bills-reported"] = {
		"value": len(was_reported),
		"bills": make_bill_entries(was_reported),
	}

	stats["bills-with-committee-leaders"] = {
		"value": len(has_cmte_leaders),
		"bills": make_bill_entries(has_cmte_leaders),
	}

	stats["bills-with-cosponsors-both-parties-count"] = {
		"value": len(has_cosponsors_both_parties),
		"bills": make_bill_entries(has_cosponsors_both_parties),
		"num_bills": len(bills),
	}

	stats["bills-with-companion"] = {
		"value": len(has_companion),
		"other_chamber": RoleType.by_value(role.role_type).congress_chamber_other,
		"bills": make_bill_entries(has_companion),
	}
    # same position on the same day, record all of the roles
    # that were found for each office on this day.
    by_office.setdefault(p.get_office_id(), []).append(p)

  # IF any office had more than one term valid on the date,
  # skip this date.
  if len([0 for roles in by_office.values() if len(roles) > 1]) > 1:
    dates[i] = (date, { "error": "time ambiguity" })

  # Ok, count by party.
  else:
    by_party = { }
    for office_roles in by_office.values():
      role = office_roles[0]
      party = role.get_party_on_date(date)
      key = RoleType.by_value(role.role_type).key + " - " + party
      by_party[key] = by_party.get(key, 0) + 1
    dates[i] = (date, by_party)

# Get an ordering of columns.
columns = sorted(set(
  sum([list(counts.keys()) for date, counts in dates], [])
  ))
w = csv.writer(sys.stdout)
w.writerow(["", "date"] + columns)
for i, (date, counts) in enumerate(dates):
  row = [i, date.isoformat()]
  for c in columns:
    row.append(counts.get(c, "-"))
  w.writerow(row)