def get_transparency_stats(person, role, stats, congress, startdate, enddate): global transparency_bills if not transparency_bills: transparency_bills = [] for line in open("analysis/transparency-bills.txt"): bill = Bill.from_congressproject_id(re.split("\s", line)[0]) if bill.congress != congress: continue transparency_bills.append(bill) # which bills are in the right chamber? plausible_bills = [] for bill in transparency_bills: if BillType.by_value(bill.bill_type).chamber == RoleType.by_value(role.role_type).congress_chamber: plausible_bills.append(bill) # did person sponsor any of these within this session? sponsored = [] for bill in transparency_bills: if startdate <= bill.introduced_date <= enddate and bill.sponsor == person: sponsored.append(bill) # did person cosponsor any of these within this session? cosponsored = [] for cosp in Cosponsor.objects.filter(person=person, bill__in=transparency_bills, joined__gte=startdate, joined__lte=enddate): cosponsored.append(cosp.bill) stats["transparency-bills"] = { "value": len(sponsored)*3 + len(cosponsored), "sponsored": make_bill_entries(sponsored), "cosponsored": make_bill_entries(cosponsored), "num_bills": len(plausible_bills), "chamber": RoleType.by_value(role.role_type).congress_chamber, }
def get_sponsorship_analysis_stats(person, role, stats): global sponsorship_analysis_data s = sponsorship_analysis_data.get( (role.role_type, person.id) ) stats["ideology"] = { "value": s[0] if s else None, "role": RoleType.by_value(role.role_type).key, } stats["leadership"] = { "value": s[1] if s else None, "role": RoleType.by_value(role.role_type).key, }
def fmt_role(r): return { "id": r.person.id, "name": r.person.name_and_title(), "link": r.person.get_absolute_url(), "type": RoleType.by_value(r.role_type).key, "pronoun": Gender.by_value(r.person.gender).pronoun, "key_vote": get_key_vote(r.person), }
def get_cohorts(person, role, congress, session, committee_membership): cohorts = [] # chamber chamber = RoleType.by_value(role.role_type).congress_chamber.lower() cohorts.append({ "key": chamber }) # party cohorts.append({ "key": "party-%s-%s" % (chamber, role.party.lower()), "chamber": chamber, "party": role.party }) # state delegation if role.role_type == RoleType.representative: cohorts.append({ "key": "house-state-delegation-" + role.state.lower(), "state": role.state }) # chamber leadership position if role.leadership_title: cohorts.append({ "key": chamber + "-leadership", "position": role.leadership_title }) # freshmen/sophomores min_start_date = None prev_congresses_served = set() # use enddate__lte=endate to include the current role itself since for # senators their current role may span previous congresses for r in PersonRole.objects.filter(person=person, role_type=role.role_type, enddate__lte=role.enddate): if not min_start_date or r.startdate < min_start_date: min_start_date = r.startdate for c in r.congress_numbers(): if c < congress: prev_congresses_served.add(c) if len(prev_congresses_served) == 0: cohorts.append({ "key": chamber + "-freshmen", "chamber": chamber }) if len(prev_congresses_served) == 1: cohorts.append({ "key": chamber + "-sophomores", "chamber": chamber }) if min_start_date and (role.enddate - min_start_date).days > 365.25*10: cohorts.append({ "key": chamber + "-tenyears", "chamber": chamber, "first_date": min_start_date.isoformat() }) # committee leadership positions committee_positions = [] for committee, committee_role in committee_membership.get(person.id, {}).items(): if len(committee) != 4: continue # exclude subcommittees if committee_role in (CommitteeMemberRole.ranking_member, CommitteeMemberRole.vice_chairman, CommitteeMemberRole.chairman): committee_positions.append( (committee, committee_role) ) if len(committee_positions) > 0: cohorts.append({ "key": chamber + "-committee-leaders", "chamber": chamber, "positions": committee_positions }) # safe vs competitive seats if role.role_type == RoleType.representative: global competitive_seats if competitive_seats is None: competitive_seats = set() for line in open("analysis/cook_competitive_seats-%s.txt" % session): if line[0] == "#": continue (state, district) = line.split(" ")[0].split("-") competitive_seats.add( (state, int(district)) ) if (role.state, role.district) in competitive_seats: cohorts.append({ "key": chamber + "-competitive-seat" }) else: cohorts.append({ "key": chamber + "-safe-seat" }) return cohorts
def get_vote_stats(person, role, stats, votes_this_year): # Missed vote % in the chamber that the Member is currently serving in. if role.leadership_title == "Speaker": return votes_elligible = Voter.objects.filter(person=person, vote__in=votes_this_year[role.role_type]) votes_missed = votes_elligible.filter(option__key="0") v1 = votes_elligible.count() v2 = votes_missed.count() stats["missed-votes"] = { "value": round(100.0*v2/v1, 3) if v1 > 0 else None, "elligible": v1, "missed": v2, "role": RoleType.by_value(role.role_type).key, }
def get_cohorts(person, role, congress, session, committee_membership): cohorts = [] # chamber chamber = RoleType.by_value(role.role_type).congress_chamber.lower() cohorts.append({ "key": chamber }) # party cohorts.append({ "key": "party-%s-%s" % (chamber, role.party.lower()), "chamber": chamber, "party": role.party }) # state delegation if role.role_type == RoleType.representative: cohorts.append({ "key": "house-state-delegation-" + role.state.lower(), "state": role.state }) # chamber leadership position if role.leadership_title: cohorts.append({ "key": chamber + "-leadership", "position": role.leadership_title }) # freshmen/sophomores min_start_date = None years_served = 0 prev_congresses_served = set() # use enddate__lte=endate to include the current role itself since for # senators their current role may span previous congresses for r in PersonRole.objects.filter(person=person, role_type=role.role_type, enddate__lte=role.enddate): if not min_start_date or r.startdate < min_start_date: min_start_date = r.startdate years_served += round( (min(r.enddate,datetime.datetime.now().date())-r.startdate).days / 365.25 ) # end dates for senators may be far in the future; round because terms may be slightly less than a year for c in r.congress_numbers(): if c < congress: prev_congresses_served.add(c) if person.id in (412505, 412503, 412506, 412507): # Schatz served only a few days in the 112th Congress. Other members took office # within the last two months of the 112th Congress. For the 2013 stats, I originally # classified them as sophomores. I revised it to drop that cohort from Schatz after # hearing from his office. For 2014 stats, I am classifying them all as a freshman # rather than sophomores; for 2015/2016 I am classifying them as sophomores. if session == "2014": assert prev_congresses_served == { 112 } prev_congresses_served = set() elif session in ("2015", "2016"): assert prev_congresses_served == { 112, 113 } prev_congresses_served = set([113]) if person.id in (412605, 412606, 412607): # Similarly, Dave Brat (412605), Donald Norcross (412606), and Alma Adams (412607) # took office on 2014-11-12, but we'll treat them as freshman in the 114th Congress # rather than sophomores. if session in ("2015", "2016"): assert prev_congresses_served == { 113 } prev_congresses_served = set() elif session in ("2017", "2018"): assert prev_congresses_served == { 113, 114 } prev_congresses_served = set([114]) if person.id in (412676, 412676): # Similarly, James Comer, Dwight Evans took office only after the 2016 election, so we'll # keep them as a freshman again in 2017, 2018 and then sophomores in 2019, 2020. if session in ("2017", "2018"): assert prev_congresses_served == { 114 } prev_congresses_served = set() elif session in ("2019", "2020"): assert prev_congresses_served == { 114, 115 } prev_congresses_served = set([115]) if person.id in (412748, 412749, 412750, 412751): # Similarly, Kevin Hern, Joseph D. Morelle, Mary Gay Scanlon, Susan Wild took office only after the 2018 election, so we'll # keep them as a freshman again in 2019, 2020 and then sophomores in 2021, 2022. if session in ("2019", "2020"): assert prev_congresses_served == { 115 } prev_congresses_served = set() elif session in ("2021", "2022"): assert prev_congresses_served == { 115, 116 } prev_congresses_served = set([116]) if len(prev_congresses_served) == 0: cohorts.append({ "key": chamber + "-freshmen", "chamber": chamber }) if len(prev_congresses_served) == 1: cohorts.append({ "key": chamber + "-sophomores", "chamber": chamber }) if years_served >= 10: cohorts.append({ "key": chamber + "-tenyears", "chamber": chamber, "first_date": min_start_date.isoformat() }) return cohorts
def get_sponsor_stats(person, role, stats, congress, startdate, enddate, committee_membership): # How many bills did the Member introduce during this time window? bills = Bill.objects.filter(sponsor=person, congress=congress, introduced_date__gte=startdate, introduced_date__lte=enddate) stats["bills-introduced"] = { "value": bills.count(), } # How many bills were "enacted" within this time window? Follow the was_enacted_ex logic # which includes text incorporation. Only use the date range on single-year stats because # a bill might be enacted after the end of a Congress. Don't count the bill itself if it became # the vehicle for passage of an unrelated matter, because the original sponsor has nothing to # do with that and shouldn't get credit. But do count it if it was incorporated into a bill # that was a vehicle for passage. def was_bill_enacted(b, startdate, enddate): if (enddate-startdate).days > 365*1.5: bs = b.was_enacted_ex() else: bs = b.was_enacted_ex(restrict_to_activity_in_date_range=(startdate, enddate)) for b1 in (bs or []): if b1 == b and b1.original_intent_replaced: continue return True # i.e. was enacted via any bill that isn't itself and itself was a vehicle return False bills_enacted = [b for b in bills if was_bill_enacted(b, startdate, enddate)] stats["bills-enacted-ti"] = { "value": len(bills_enacted), "bills": make_bill_entries(bills_enacted), } # In order to test these remaining factors, we have to look more closely # at the bill because we can only use activities that ocurred during the # time window so that if we re-run this script on the same window at a # later date nothing changes -- i.e. future activitiy on bills should # not affect 1st Session statistics. Mostly. bills = list(bills) was_reported = [] has_cmte_leaders = [] has_bipartisan_cosponsor = [] has_companion = [] for bill in bills: # Skip any bills that were the vehicle for passage of an unrelated matter # (and the original matter is gone). if bill.original_intent_replaced: continue # Check if the bill was reported during this time period. # We'll consider a bill reported if it had any major status change, # since a report per se doesn't always occur. Find the first # major status change and see if that occurred in the time # window of these stats. first_major_status_change = None for datestr, st, text, srcxml in bill.major_actions: if st != BillStatus.introduced: d = eval(datestr) if isinstance(d, datetime.datetime): d = d.date() first_major_status_change = d break if first_major_status_change and (startdate <= first_major_status_change <= enddate): was_reported.append(bill) elif bill in bills_enacted: # Also give credit if the bill's provisions were enacted via # other legislation during this time period. was_reported.append(bill) # Check whether any cosponsors are on relevant committees. # Warning: Committee membership data is volatile, so re-running the stats may come out different. cosponsors = list(Cosponsor.objects.filter(bill=bill, joined__gte=startdate, joined__lte=enddate).select_related("person", "role")) x = False for committee in list(bill.committees.all()): for cosponsor in cosponsors: if committee_membership.get(cosponsor.person.id, {}).get(committee.code) in (CommitteeMemberRole.ranking_member, CommitteeMemberRole.vice_chair, CommitteeMemberRole.chair): x = True if x: has_cmte_leaders.append(bill) # Check whether there's a cosponsor from the opposite party of the sponsor. # For independents, use the party they caucus with. # Warning: If someone switches parties, this will change. sponsor_party = bill.sponsor_role.caucus or bill.sponsor_role.party assert sponsor_party in ("Republican", "Democrat") for cosponsor in cosponsors: cosponsor_party = cosponsor.role.caucus or cosponsor.role.party assert cosponsor_party in ("Republican", "Democrat") if cosponsor_party != sponsor_party: has_bipartisan_cosponsor.append(bill) break # Check if a companion bill was introduced during the time period. if RelatedBill.objects.filter(bill=bill, relation="identical", related_bill__introduced_date__gte=startdate, related_bill__introduced_date__lte=enddate).exists(): has_companion.append(bill) stats["bills-reported"] = { "value": len(was_reported), "bills": make_bill_entries(was_reported), } stats["bills-with-committee-leaders"] = { "value": len(has_cmte_leaders), "bills": make_bill_entries(has_cmte_leaders), } stats["bills-with-cosponsor-other-party"] = { "value": len(has_bipartisan_cosponsor), "bills": make_bill_entries(has_bipartisan_cosponsor), "num_bills": len(bills), } stats["bills-with-companion"] = { "value": len(has_companion), "other_chamber": RoleType.by_value(role.role_type).congress_chamber_other, "bills": make_bill_entries(has_companion), }
def main(options): """ Update Person and PersonRole models. Do safe update: touch only those records which have been changed. """ BASE_PATH = CONGRESS_LEGISLATORS_PATH SRC_FILES = [ 'legislators-current', 'legislators-historical', 'legislators-social-media', 'executive' ] # order matters for p in SRC_FILES: f = BASE_PATH + p + ".yaml" if not File.objects.is_changed(f) and not options.force: log.info('File %s was not changed' % f) else: # file modified... break else: # no 'break' ==> no files modified return # Start parsing. had_error = False # Get combined data. legislator_data = {} leg_id_map = {} for p in SRC_FILES: log.info('Opening %s...' % p) f = BASE_PATH + p + ".yaml" y = yaml_load(f) for m in y: if p == "legislators-current": # We know all terms but the last are non-current and the last is. for r in m["terms"]: r["current"] = False m["terms"][-1]["current"] = True elif p == "legislators-historical": # We know all terms are non-current. for r in m["terms"]: r["current"] = False if p != 'legislators-social-media': govtrack_id = m["id"].get("govtrack") # For the benefit of the social media file, make a mapping of IDs. for k, v in m["id"].items(): if type(v) != list: leg_id_map[(k, v)] = govtrack_id else: # GovTrack IDs are not always listed in this file. govtrack_id = None for k, v in m["id"].items(): if type(v) != list and (k, v) in leg_id_map: govtrack_id = leg_id_map[(k, v)] break if not govtrack_id: print("No GovTrack ID:") pprint.pprint(m) had_error = True continue if govtrack_id not in legislator_data: legislator_data[govtrack_id] = m elif p == "legislators-social-media": legislator_data[govtrack_id]["social"] = m["social"] elif p == "executive": legislator_data[govtrack_id]["terms"].extend(m["terms"]) else: raise ValueError("Duplication in an unexpected way (%d, %s)." % (govtrack_id, p)) person_processor = PersonProcessor() role_processor = PersonRoleProcessor() existing_persons = set(Person.objects.values_list('pk', flat=True)) processed_persons = set() created_persons = set() progress = Progress(total=len(legislator_data)) log.info('Processing persons') for node in legislator_data.values(): # Wrap each iteration in try/except # so that if some node breaks the parsing process # then other nodes could be parsed try: person = person_processor.process(Person(), node) # Create cached name strings. This is done again later # after the roles are updated. person.set_names() # Now try to load the person with such ID from # database. If found it then just update it # else create new Person object try: ex_person = Person.objects.get(pk=person.pk) if person_processor.changed(ex_person, person) or options.force: # If the person has PK of existing record, # coming in via the YAML-specified GovTrack ID, # then Django ORM will update existing record if not options.force: log.warn("Updated %s" % person) person.save() except Person.DoesNotExist: created_persons.add(person.pk) person.save() log.warn("Created %s" % person) processed_persons.add(person.pk) # Parse all of the roles. new_roles = [] for termnode in node['terms']: role = role_processor.process(PersonRole(), termnode) role.person = person role.extra = filter_yaml_term_structure( termnode) # copy in the whole YAML structure # Is this role current? For legislators, same as whether it came from legislators-current, which eases Jan 3 transitions when we can't distinguish by date. if "current" in termnode: role.current = termnode["current"] # But executives... else: now = datetime.now().date() role.current = role.startdate <= now and role.enddate >= now # Because of date overlaps at noon transition dates, ensure that only the last term that covers # today is current --- reset past roles to not current. Doesn't handle turning off retirning people tho. for r in new_roles: r.current = False # Scan for most recent leadership role within the time period of this term, # which isn't great for Senators because it's likely it changed a few times # within a term, especially if there was a party switch. role.leadership_title = None for leadership_node in node.get("leadership_roles", []): # must match on date and chamber if leadership_node["start"] >= role.enddate.isoformat(): continue # might start on the same day but is for the next Congress if "end" in leadership_node and leadership_node[ "end"] <= role.startdate.isoformat(): continue # might start on the same day but is for the previous Congress if leadership_node["chamber"] != RoleType.by_value( role.role_type).congress_chamber.lower(): continue role.leadership_title = leadership_node["title"] new_roles.append(role) # Try matching the new roles to existing db records. Since we don't have a primry key # in the source data, we have to match on the record values. But because of errors in data, # term start/end dates can change, so matching has to be a little fuzzy. existing_roles = list(PersonRole.objects.filter(person=person)) matches = [] def run_match_rule(rule): import itertools for new_role, existing_role in itertools.product( new_roles, existing_roles): if new_role not in new_roles or existing_role not in existing_roles: continue # already matched on a previous iteration if new_role.role_type != existing_role.role_type: continue if new_role.state != existing_role.state: continue if rule(new_role, existing_role): matches.append((new_role, existing_role)) new_roles.remove(new_role) existing_roles.remove(existing_role) # First match exactly, then exact on just one date, then on contractions and expansions. run_match_rule(lambda new_role, existing_role: new_role.startdate == existing_role.startdate and new_role.enddate == existing_role.enddate) run_match_rule(lambda new_role, existing_role: new_role.startdate == existing_role.startdate or new_role.enddate == existing_role.enddate) run_match_rule(lambda new_role, existing_role: new_role.startdate >= existing_role.startdate and new_role.enddate <= existing_role.enddate) run_match_rule(lambda new_role, existing_role: new_role.startdate <= existing_role.startdate and new_role.enddate >= existing_role.enddate) # Update the database entries that correspond with records in the data file. did_update_any = False for new_role, existing_role in matches: new_role.id = existing_role.id if role_processor.changed(existing_role, new_role) or options.force: new_role.save() did_update_any = True if not options.force: log.warn("Updated %s" % new_role) # If we have mutliple records on disk that didn't match and multiple records in the database # that didn't match, then we don't know how to align them. if len(new_roles) > 0 and len(existing_roles) > 0: print(new_roles) print(existing_roles) raise Exception("There is an unmatched role.") # Otherwise if there are any unmatched new roles, we can just add them. for role in new_roles: log.warn("Created %s" % role) role.save() did_update_any = True # And likewise for any existing roles that are left over. for pr in existing_roles: print(pr.person.id, pr) raise ValueError("Deleted role??") log.warn("Deleted %s" % pr) pr.delete() if did_update_any and not options.disable_events: # Create the events for the roles after all have been loaded # because we don't create events for ends of terms and # starts of terms that are adjacent. Refresh the list to get # the roles in order. role_list = list( PersonRole.objects.filter( person=person).order_by('startdate')) for i in range(len(role_list)): role_list[i].create_events( role_list[i - 1] if i > 0 else None, role_list[i + 1] if i < len(role_list) - 1 else None) # The name can't be determined until all of the roles are set. If # it changes, re-save. Unfortunately roles are cached so this actually # doesn't work yet. Re-run the parser to fix names. nn = (person.name, person.sortname) if hasattr(person, "role"): delattr(person, "role") # clear the cached info person._most_recent_role = None # clear cache here too person.set_names() if nn != (person.name, person.sortname): log.warn("%s is now %s." % (nn[0], person.name)) person.save() except Exception as ex: # Catch unexpected exceptions and log them pprint.pprint(node) log.error('', exc_info=ex) had_error = True progress.tick() log.info('Processed persons: %d' % len(processed_persons)) log.info('Created persons: %d' % len(created_persons)) if not had_error: # Remove person which were not found in XML file removed_persons = existing_persons - processed_persons for pk in removed_persons: p = Person.objects.get(pk=pk) if p.roles.all().count() > 0: log.warn("Missing? Deleted? %d: %s" % (p.id, p)) else: log.warn("Deleting... %d: %s (remember to prune_index!)" % (p.id, p)) raise Exception("Won't delete!") p.delete() log.info('Missing/deleted persons: %d' % len(removed_persons)) # Mark the files as processed. for p in SRC_FILES: f = BASE_PATH + p + ".yaml" File.objects.save_file(f) update_twitter_list()
def main(options): """ Update Person and PersonRole models. Do safe update: touch only those records which have been changed. """ BASE_PATH = CONGRESS_LEGISLATORS_PATH SRC_FILES = ['legislators-current', 'legislators-historical', 'legislators-social-media', 'executive'] # order matters for p in SRC_FILES: f = BASE_PATH + p + ".yaml" if not File.objects.is_changed(f) and not options.force: log.info('File %s was not changed' % f) else: # file modified... break else: # no 'break' ==> no files modified return # Start parsing. had_error = False # Get combined data. legislator_data = { } leg_id_map = { } for p in SRC_FILES: log.info('Opening %s...' % p) f = BASE_PATH + p + ".yaml" y = yaml_load(f) for m in y: if p != 'legislators-social-media': govtrack_id = m["id"].get("govtrack") # For the benefit of the social media file, make a mapping of IDs. for k, v in m["id"].items(): if type(v) != list: leg_id_map[(k,v)] = govtrack_id else: # GovTrack IDs are not always listed in this file. govtrack_id = None for k, v in m["id"].items(): if type(v) != list and (k, v) in leg_id_map: govtrack_id = leg_id_map[(k,v)] break if not govtrack_id: print "No GovTrack ID:" pprint.pprint(m) had_error = True continue if govtrack_id not in legislator_data: legislator_data[govtrack_id] = m elif p == "legislators-social-media": legislator_data[govtrack_id]["social"] = m["social"] elif p == "executive": legislator_data[govtrack_id]["terms"].extend( m["terms"] ) else: raise ValueError("Duplication in an unexpected way (%d, %s)." % (govtrack_id, p)) person_processor = PersonProcessor() role_processor = PersonRoleProcessor() existing_persons = set(Person.objects.values_list('pk', flat=True)) processed_persons = set() created_persons = set() progress = Progress(total=len(legislator_data)) log.info('Processing persons') for node in legislator_data.values(): # Wrap each iteration in try/except # so that if some node breaks the parsing process # then other nodes could be parsed try: person = person_processor.process(Person(), node) # Create cached name strings. This is done again later # after the roles are updated. person.set_names() # Now try to load the person with such ID from # database. If found it then just update it # else create new Person object try: ex_person = Person.objects.get(pk=person.pk) if person_processor.changed(ex_person, person) or options.force: # If the person has PK of existing record, # coming in via the YAML-specified GovTrack ID, # then Django ORM will update existing record if not options.force: log.warn("Updated %s" % person) person.save() except Person.DoesNotExist: created_persons.add(person.pk) person.save() log.warn("Created %s" % person) processed_persons.add(person.pk) # Process roles of the person roles = list(PersonRole.objects.filter(person=person)) existing_roles = set(PersonRole.objects.filter(person=person).values_list('pk', flat=True)) processed_roles = set() role_list = [] for role in node['terms']: role = role_processor.process(PersonRole(), role) role.person = person role.current = role.startdate <= datetime.now().date() and role.enddate >= datetime.now().date() # \ #and CURRENT_CONGRESS in role.congress_numbers() # Scan for most recent leadership role within the time period of this term, # which isn't great for Senators because it's likely it changed a few times # within a term, especially if there was a party switch. role.leadership_title = None for leadership_node in node.get("leadership_roles", []): # must match on date and chamber if leadership_node["start"] >= role.enddate.isoformat(): continue # might start on the same day but is for the next Congress if "end" in leadership_node and leadership_node["end"] <= role.startdate.isoformat(): continue # might start on the same day but is for the previous Congress if leadership_node["chamber"] != RoleType.by_value(role.role_type).congress_chamber.lower(): continue role.leadership_title = leadership_node["title"] # Try to match this role with one already in the database. # First search for an exact match on type/start/end. ex_role = None for r in roles: if role.role_type == r.role_type and r.startdate == role.startdate and r.enddate == role.enddate: ex_role = r break # Otherwise match on type/start only. if not ex_role: for r in roles: if role.role_type == r.role_type and r.startdate == role.startdate: ex_role = r break if ex_role: # These roles correspond. processed_roles.add(ex_role.id) role.id = ex_role.id if role_processor.changed(ex_role, role) or options.force: role.save() role_list.append(role) if not options.force: log.warn("Updated %s" % role) roles.remove(ex_role) # don't need to try matching this to any other node else: # Didn't find a matching role. if len([r for r in roles if r.role_type == role.role_type]) > 0: print role, "is one of these?" for ex_role in roles: print "\t", ex_role raise Exception("There is an unmatched role.") log.warn("Created %s" % role) role.save() role_list.append(role) # create the events for the roles after all have been loaded # because we don't create events for ends of terms and # starts of terms that are adjacent. if not options.disable_events: for i in xrange(len(role_list)): role_list[i].create_events( role_list[i-1] if i > 0 else None, role_list[i+1] if i < len(role_list)-1 else None ) removed_roles = existing_roles - processed_roles for pk in removed_roles: pr = PersonRole.objects.get(pk=pk) print pr.person.id, pr raise ValueError("Deleted role??") log.warn("Deleted %s" % pr) pr.delete() # The name can't be determined until all of the roles are set. If # it changes, re-save. Unfortunately roles are cached so this actually # doesn't work yet. Re-run the parser to fix names. nn = (person.name, person.sortname) if hasattr(person, "role"): delattr(person, "role") # clear the cached info person.set_names() if nn != (person.name, person.sortname): log.warn("%s is now %s." % (nn[0], person.name)) person.save() except Exception, ex: # Catch unexpected exceptions and log them pprint.pprint(node) log.error('', exc_info=ex) had_error = True progress.tick()
def get_cohorts(person, role, congress, session, committee_membership): cohorts = [] # chamber chamber = RoleType.by_value(role.role_type).congress_chamber.lower() cohorts.append({"key": chamber}) # party cohorts.append({ "key": "party-%s-%s" % (chamber, role.party.lower()), "chamber": chamber, "party": role.party }) # state delegation if role.role_type == RoleType.representative: cohorts.append({ "key": "house-state-delegation-" + role.state.lower(), "state": role.state }) # chamber leadership position if role.leadership_title: cohorts.append({ "key": chamber + "-leadership", "position": role.leadership_title }) # freshmen/sophomores min_start_date = None prev_congresses_served = set() # use enddate__lte=endate to include the current role itself since for # senators their current role may span previous congresses for r in PersonRole.objects.filter(person=person, role_type=role.role_type, enddate__lte=role.enddate): if not min_start_date or r.startdate < min_start_date: min_start_date = r.startdate for c in r.congress_numbers(): if c < congress: prev_congresses_served.add(c) if len(prev_congresses_served) == 0: cohorts.append({"key": chamber + "-freshmen", "chamber": chamber}) if len(prev_congresses_served) == 1: cohorts.append({"key": chamber + "-sophomores", "chamber": chamber}) if min_start_date and (role.enddate - min_start_date).days > 365.25 * 10: cohorts.append({ "key": chamber + "-tenyears", "chamber": chamber, "first_date": min_start_date.isoformat() }) # committee leadership positions committee_positions = [] for committee, committee_role in committee_membership.get(person.id, {}).items(): if len(committee) != 4: continue # exclude subcommittees if committee_role in (CommitteeMemberRole.ranking_member, CommitteeMemberRole.vice_chairman, CommitteeMemberRole.chairman): committee_positions.append((committee, committee_role)) if len(committee_positions) > 0: cohorts.append({ "key": chamber + "-committee-leaders", "chamber": chamber, "positions": committee_positions }) # safe vs competitive seats if role.role_type == RoleType.representative: global competitive_seats if competitive_seats is None: competitive_seats = set() for line in open("analysis/cook_competitive_seats-%s.txt" % session): if line[0] == "#": continue (state, district) = line.split(" ")[0].split("-") competitive_seats.add((state, int(district))) if (role.state, role.district) in competitive_seats: cohorts.append({"key": chamber + "-competitive-seat"}) else: cohorts.append({"key": chamber + "-safe-seat"}) return cohorts
def get_cohorts(person, role, congress, session, committee_membership): cohorts = [] # chamber chamber = RoleType.by_value(role.role_type).congress_chamber.lower() cohorts.append({ "key": chamber }) # party cohorts.append({ "key": "party-%s-%s" % (chamber, role.party.lower()), "chamber": chamber, "party": role.party }) # state delegation if role.role_type == RoleType.representative: cohorts.append({ "key": "house-state-delegation-" + role.state.lower(), "state": role.state }) # chamber leadership position if role.leadership_title: cohorts.append({ "key": chamber + "-leadership", "position": role.leadership_title }) # freshmen/sophomores min_start_date = None prev_congresses_served = set() # use enddate__lte=endate to include the current role itself since for # senators their current role may span previous congresses for r in PersonRole.objects.filter(person=person, role_type=role.role_type, enddate__lte=role.enddate): if not min_start_date or r.startdate < min_start_date: min_start_date = r.startdate for c in r.congress_numbers(): if c < congress: prev_congresses_served.add(c) if person.id in (412505, 412503, 412506, 412507) and session == '2014': # Schatz served only a few days in the 112th Congress. Other members took office # within the last two months of the 112th Congress. For the 2013 stats, I originally # classified them as sophomores. I revised it to drop that cohort from Schatz after # hearing from his office. For 2014 stats, I am classifying them all as a freshman # rather than sophomores. assert prev_congresses_served == { 112 } prev_congresses_served = set() if len(prev_congresses_served) == 0: cohorts.append({ "key": chamber + "-freshmen", "chamber": chamber }) if len(prev_congresses_served) == 1: cohorts.append({ "key": chamber + "-sophomores", "chamber": chamber }) if min_start_date and (role.enddate - min_start_date).days > 365.25*10: cohorts.append({ "key": chamber + "-tenyears", "chamber": chamber, "first_date": min_start_date.isoformat() }) # committee leadership positions committee_positions = [] for committee, committee_role in committee_membership.get(person.id, {}).items(): if len(committee) != 4: continue # exclude subcommittees if committee_role in (CommitteeMemberRole.ranking_member, CommitteeMemberRole.vice_chairman, CommitteeMemberRole.chairman): committee_positions.append( (committee, committee_role) ) if len(committee_positions) > 0: cohorts.append({ "key": chamber + "-committee-leaders", "chamber": chamber, "positions": committee_positions }) # safe vs competitive seats if role.role_type == RoleType.representative: global competitive_seats if competitive_seats is None: competitive_seats = set() for line in open("analysis/cook_competitive_seats-%s.txt" % session): if line[0] == "#": continue (state, district) = line.split(" ")[0].split("-") competitive_seats.add( (state, int(district)) ) if (role.state, role.district) in competitive_seats: cohorts.append({ "key": chamber + "-competitive-seat" }) else: cohorts.append({ "key": chamber + "-safe-seat" }) return cohorts
def get_sponsor_stats(person, role, stats, congress, startdate, enddate, committee_membership): # How many bills did the Member introduce during this time window? bills = Bill.objects.filter(sponsor=person, congress=congress, introduced_date__gte=startdate, introduced_date__lte=enddate) stats["bills-introduced"] = { "value": bills.count(), } # How many bills were enacted within this time window? #bills_enacted = bills.filter(current_status__in=BillStatus.final_status_passed_bill, # current_status_date__gte=startdate, current_status_date__lte=enddate) def was_bill_enacted(b, startdate, enddate): return b.enacted_ex(restrict_to_activity_in_date_range=(startdate.isoformat(), enddate.isoformat())) bills_enacted = [b for b in bills if was_bill_enacted(b, startdate, enddate)] stats["bills-enacted"] = { "value": len(bills_enacted), "bills": make_bill_entries(bills_enacted), } bills = list(bills) was_reported = [] has_cmte_leaders = [] has_cosponsors_both_parties = 0 has_companion = [] for bill in bills: # In order to test these remaining factors, we have to look more closely # at the bill because we can only use activities that ocurred during the # time window so that if we re-run this script on the same window at a # later date nothing changes -- i.e. future activitiy on bills should # not affect 1st Session statistics. # Check if the bill was reported during this time period. for datestr, st, text, srcxml in bill.major_actions: date = eval(datestr) if isinstance(date, datetime.datetime): date = date.date() if date >= startdate and date <= enddate and st == BillStatus.reported: was_reported.append(bill) break # make sure not to double-count any bills in case of data errors # Check whether any cosponsors are on relevant committees. cosponsors = list(Cosponsor.objects.filter(bill=bill, joined__gte=startdate, joined__lte=enddate).select_related("person", "role")) x = False for committee in list(bill.committees.all()): for cosponsor in cosponsors: if committee_membership.get(cosponsor.person.id, {}).get(committee.code) in (CommitteeMemberRole.ranking_member, CommitteeMemberRole.vice_chairman, CommitteeMemberRole.chairman): x = True if x: has_cmte_leaders.append(bill) # Check whether there's a cosponsor from both parties. co_d = False co_r = False for cosponsor in cosponsors: if cosponsor.role.party == "Democrat": co_d = True if cosponsor.role.party == "Republican": co_r = True if co_d and co_r: has_cosponsors_both_parties += 1 # Check if a companion bill was introduced during the time period. if RelatedBill.objects.filter(bill=bill, relation="identical", related_bill__introduced_date__gte=startdate, related_bill__introduced_date__lte=enddate).exists(): has_companion.append(bill) stats["bills-reported"] = { "value": len(was_reported), "bills": make_bill_entries(was_reported), } stats["bills-with-committee-leaders"] = { "value": len(has_cmte_leaders), "bills": make_bill_entries(has_cmte_leaders), } if len(bills) > 10: stats["bills-with-cosponsors-both-parties"] = { "value": 100.0*float(has_cosponsors_both_parties)/float(len(bills)), "num_bills": len(bills), } stats["bills-with-companion"] = { "value": len(has_companion), "other_chamber": RoleType.by_value(role.role_type).congress_chamber_other, "bills": make_bill_entries(has_companion), }
def main(options): """ Update Person and PersonRole models. Do safe update: touch only those records which have been changed. """ BASE_PATH = CONGRESS_LEGISLATORS_PATH SRC_FILES = ['legislators-current', 'legislators-historical', 'legislators-social-media', 'executive'] # order matters for p in SRC_FILES: f = BASE_PATH + p + ".yaml" if not File.objects.is_changed(f) and not options.force: log.info('File %s was not changed' % f) else: # file modified... break else: # no 'break' ==> no files modified return # Start parsing. had_error = False # Get combined data. legislator_data = { } leg_id_map = { } for p in SRC_FILES: log.info('Opening %s...' % p) f = BASE_PATH + p + ".yaml" y = yaml_load(f) for m in y: if p == "legislators-current": # We know all terms but the last are non-current and the last is. for r in m["terms"]: r["current"] = False m["terms"][-1]["current"] = True elif p == "legislators-historical": # We know all terms are non-current. for r in m["terms"]: r["current"] = False if p != 'legislators-social-media': govtrack_id = m["id"].get("govtrack") # For the benefit of the social media file, make a mapping of IDs. for k, v in m["id"].items(): if type(v) != list: leg_id_map[(k,v)] = govtrack_id else: # GovTrack IDs are not always listed in this file. govtrack_id = None for k, v in m["id"].items(): if type(v) != list and (k, v) in leg_id_map: govtrack_id = leg_id_map[(k,v)] break if not govtrack_id: print "No GovTrack ID:" pprint.pprint(m) had_error = True continue if govtrack_id not in legislator_data: legislator_data[govtrack_id] = m elif p == "legislators-social-media": legislator_data[govtrack_id]["social"] = m["social"] elif p == "executive": legislator_data[govtrack_id]["terms"].extend( m["terms"] ) else: raise ValueError("Duplication in an unexpected way (%d, %s)." % (govtrack_id, p)) person_processor = PersonProcessor() role_processor = PersonRoleProcessor() existing_persons = set(Person.objects.values_list('pk', flat=True)) processed_persons = set() created_persons = set() progress = Progress(total=len(legislator_data)) log.info('Processing persons') for node in legislator_data.values(): # Wrap each iteration in try/except # so that if some node breaks the parsing process # then other nodes could be parsed try: person = person_processor.process(Person(), node) # Create cached name strings. This is done again later # after the roles are updated. person.set_names() # Now try to load the person with such ID from # database. If found it then just update it # else create new Person object try: ex_person = Person.objects.get(pk=person.pk) if person_processor.changed(ex_person, person) or options.force: # If the person has PK of existing record, # coming in via the YAML-specified GovTrack ID, # then Django ORM will update existing record if not options.force: log.warn("Updated %s" % person) person.save() except Person.DoesNotExist: created_persons.add(person.pk) person.save() log.warn("Created %s" % person) processed_persons.add(person.pk) # Parse all of the roles. new_roles = [] for termnode in node['terms']: role = role_processor.process(PersonRole(), termnode) role.person = person role.extra = filter_yaml_term_structure(termnode) # copy in the whole YAML structure # Is this role current? For legislators, same as whether it came from legislators-current, which eases Jan 3 transitions when we can't distinguish by date. if "current" in termnode: role.current = termnode["current"] # But executives... else: now = datetime.now().date() role.current = role.startdate <= now and role.enddate >= now # Because of date overlaps at noon transition dates, ensure that only the last term that covers # today is current --- reset past roles to not current. Doesn't handle turning off retirning people tho. for r in new_roles: r.current = False # Scan for most recent leadership role within the time period of this term, # which isn't great for Senators because it's likely it changed a few times # within a term, especially if there was a party switch. role.leadership_title = None for leadership_node in node.get("leadership_roles", []): # must match on date and chamber if leadership_node["start"] >= role.enddate.isoformat(): continue # might start on the same day but is for the next Congress if "end" in leadership_node and leadership_node["end"] <= role.startdate.isoformat(): continue # might start on the same day but is for the previous Congress if leadership_node["chamber"] != RoleType.by_value(role.role_type).congress_chamber.lower(): continue role.leadership_title = leadership_node["title"] new_roles.append(role) # Try matching the new roles to existing db records. Since we don't have a primry key # in the source data, we have to match on the record values. But because of errors in data, # term start/end dates can change, so matching has to be a little fuzzy. existing_roles = list(PersonRole.objects.filter(person=person)) matches = [] def run_match_rule(rule): import itertools for new_role, existing_role in itertools.product(new_roles, existing_roles): if new_role not in new_roles or existing_role not in existing_roles: continue # already matched on a previous iteration if new_role.role_type != existing_role.role_type: continue if new_role.state != existing_role.state: continue if rule(new_role, existing_role): matches.append((new_role, existing_role)) new_roles.remove(new_role) existing_roles.remove(existing_role) # First match exactly, then exact on just one date, then on contractions and expansions. run_match_rule(lambda new_role, existing_role : new_role.startdate == existing_role.startdate and new_role.enddate == existing_role.enddate) run_match_rule(lambda new_role, existing_role : new_role.startdate == existing_role.startdate or new_role.enddate == existing_role.enddate) run_match_rule(lambda new_role, existing_role : new_role.startdate >= existing_role.startdate and new_role.enddate <= existing_role.enddate) run_match_rule(lambda new_role, existing_role : new_role.startdate <= existing_role.startdate and new_role.enddate >= existing_role.enddate) # Update the database entries that correspond with records in the data file. did_update_any = False for new_role, existing_role in matches: new_role.id = existing_role.id if role_processor.changed(existing_role, new_role) or options.force: new_role.save() did_update_any = True if not options.force: log.warn("Updated %s" % new_role) # If we have mutliple records on disk that didn't match and multiple records in the database # that didn't match, then we don't know how to align them. if len(new_roles) > 0 and len(existing_roles) > 0: print(new_roles) print(existing_roles) raise Exception("There is an unmatched role.") # Otherwise if there are any unmatched new roles, we can just add them. for role in new_roles: log.warn("Created %s" % role) role.save() did_update_any = True # And likewise for any existing roles that are left over. for pr in existing_roles: print pr.person.id, pr raise ValueError("Deleted role??") log.warn("Deleted %s" % pr) pr.delete() if did_update_any and not options.disable_events: # Create the events for the roles after all have been loaded # because we don't create events for ends of terms and # starts of terms that are adjacent. Refresh the list to get # the roles in order. role_list = list(PersonRole.objects.filter(person=person).order_by('startdate')) for i in xrange(len(role_list)): role_list[i].create_events( role_list[i-1] if i > 0 else None, role_list[i+1] if i < len(role_list)-1 else None ) # The name can't be determined until all of the roles are set. If # it changes, re-save. Unfortunately roles are cached so this actually # doesn't work yet. Re-run the parser to fix names. nn = (person.name, person.sortname) if hasattr(person, "role"): delattr(person, "role") # clear the cached info person.set_names() if nn != (person.name, person.sortname): log.warn("%s is now %s." % (nn[0], person.name)) person.save() except Exception, ex: # Catch unexpected exceptions and log them pprint.pprint(node) log.error('', exc_info=ex) had_error = True progress.tick()
def get_sponsor_stats(person, role, stats, congress, startdate, enddate, committee_membership): # How many bills did the Member introduce during this time window? bills = Bill.objects.filter(sponsor=person, congress=congress, introduced_date__gte=startdate, introduced_date__lte=enddate) stats["bills-introduced"] = { "value": bills.count(), } # How many bills were "enacted" within this time window? Follow the was_enacted_ex logic # which includes text incorporation. Only use the date range on single-year stats because # a bill might be enacted after the end of a Congress. def was_bill_enacted(b, startdate, enddate): if (enddate-startdate).days > 365*1.5: return b.was_enacted_ex() else: raise ValueError() return b.was_enacted_ex(restrict_to_activity_in_date_range=(startdate.isoformat(), enddate.isoformat())) bills_enacted = [b for b in bills if was_bill_enacted(b, startdate, enddate)] stats["bills-enacted-ti"] = { "value": len(bills_enacted), "bills": make_bill_entries(bills_enacted), } # In order to test these remaining factors, we have to look more closely # at the bill because we can only use activities that ocurred during the # time window so that if we re-run this script on the same window at a # later date nothing changes -- i.e. future activitiy on bills should # not affect 1st Session statistics. Mostly. bills = list(bills) was_reported = [] has_cmte_leaders = [] has_cosponsors_both_parties = [] has_companion = [] for bill in bills: # Check if the bill was reported during this time period. for datestr, st, text, srcxml in bill.major_actions: date = eval(datestr) if isinstance(date, datetime.datetime): date = date.date() if date >= startdate and date <= enddate and st == BillStatus.reported: was_reported.append(bill) break # make sure not to double-count any bills in case of data errors # Check whether any cosponsors are on relevant committees. # Warning: Committee membership data is volatile, so re-running the stats may come out different. cosponsors = list(Cosponsor.objects.filter(bill=bill, joined__gte=startdate, joined__lte=enddate).select_related("person", "role")) x = False for committee in list(bill.committees.all()): for cosponsor in cosponsors: if committee_membership.get(cosponsor.person.id, {}).get(committee.code) in (CommitteeMemberRole.ranking_member, CommitteeMemberRole.vice_chairman, CommitteeMemberRole.chairman): x = True if x: has_cmte_leaders.append(bill) # Check whether there's a cosponsor from both parties. # Warning: If someone switches parties, this will change. co_d = False co_r = False for cosponsor in cosponsors: if cosponsor.role.party == "Democrat": co_d = True if cosponsor.role.party == "Republican": co_r = True if co_d and co_r: has_cosponsors_both_parties.append(bill) # Check if a companion bill was introduced during the time period. if RelatedBill.objects.filter(bill=bill, relation="identical", related_bill__introduced_date__gte=startdate, related_bill__introduced_date__lte=enddate).exists(): has_companion.append(bill) stats["bills-reported"] = { "value": len(was_reported), "bills": make_bill_entries(was_reported), } stats["bills-with-committee-leaders"] = { "value": len(has_cmte_leaders), "bills": make_bill_entries(has_cmte_leaders), } stats["bills-with-cosponsors-both-parties-count"] = { "value": len(has_cosponsors_both_parties), "bills": make_bill_entries(has_cosponsors_both_parties), "num_bills": len(bills), } stats["bills-with-companion"] = { "value": len(has_companion), "other_chamber": RoleType.by_value(role.role_type).congress_chamber_other, "bills": make_bill_entries(has_companion), }
def get_cohorts(person, role, congress, session, committee_membership): cohorts = [] # chamber chamber = RoleType.by_value(role.role_type).congress_chamber.lower() cohorts.append({"key": chamber}) # party cohorts.append({ "key": "party-%s-%s" % (chamber, role.party.lower()), "chamber": chamber, "party": role.party }) # state delegation if role.role_type == RoleType.representative: cohorts.append({ "key": "house-state-delegation-" + role.state.lower(), "state": role.state }) # chamber leadership position if role.leadership_title: cohorts.append({ "key": chamber + "-leadership", "position": role.leadership_title }) # freshmen/sophomores min_start_date = None prev_congresses_served = set() # use enddate__lte=endate to include the current role itself since for # senators their current role may span previous congresses for r in PersonRole.objects.filter(person=person, role_type=role.role_type, enddate__lte=role.enddate): if not min_start_date or r.startdate < min_start_date: min_start_date = r.startdate for c in r.congress_numbers(): if c < congress: prev_congresses_served.add(c) if person.id in (412505, 412503, 412506, 412507): # Schatz served only a few days in the 112th Congress. Other members took office # within the last two months of the 112th Congress. For the 2013 stats, I originally # classified them as sophomores. I revised it to drop that cohort from Schatz after # hearing from his office. For 2014 stats, I am classifying them all as a freshman # rather than sophomores; for 2015/2016 I am classifying them as sophomores. if session == "2014": assert prev_congresses_served == {112} prev_congresses_served = set() elif session in ("2015", "2016"): assert prev_congresses_served == {112, 113} prev_congresses_served = set([113]) if person.id in (412605, 412606, 412607): # Similarly, Dave Brat (412605), Donald Norcross (412606), and Alma Adams (412607) # took office on 2014-11-12, but we'll treat them as freshman in the 114th Congress # rather than sophomores. if session in ("2015", "2016"): assert prev_congresses_served == {113} prev_congresses_served = set() elif session in ("2017", "2018"): assert prev_congresses_served == {113, 114} prev_congresses_served = set([114]) if len(prev_congresses_served) == 0: cohorts.append({"key": chamber + "-freshmen", "chamber": chamber}) if len(prev_congresses_served) == 1: cohorts.append({"key": chamber + "-sophomores", "chamber": chamber}) if min_start_date and (role.enddate - min_start_date).days > 365.25 * 10: cohorts.append({ "key": chamber + "-tenyears", "chamber": chamber, "first_date": min_start_date.isoformat() }) # committee leadership positions committee_positions = [] for committee, committee_role in committee_membership.get(person.id, {}).items(): if len(committee) != 4: continue # exclude subcommittees if committee_role in (CommitteeMemberRole.ranking_member, CommitteeMemberRole.vice_chairman, CommitteeMemberRole.chairman): committee_positions.append((committee, committee_role)) if len(committee_positions) > 0: cohorts.append({ "key": chamber + "-committee-leaders", "chamber": chamber, "positions": committee_positions }) # safe vs competitive seats if role.role_type == RoleType.representative: global competitive_seats if competitive_seats is None: competitive_seats = set() for line in open("analysis/cook_competitive_seats-%s.txt" % session): if line[0] == "#": continue (state, district) = line.split(" ")[0].split("-") competitive_seats.add((state, int(district))) if (role.state, role.district) in competitive_seats: cohorts.append({"key": chamber + "-competitive-seat"}) else: cohorts.append({"key": chamber + "-safe-seat"}) return cohorts
def get_sponsor_stats(person, role, stats, congress, startdate, enddate, committee_membership): # How many bills did the Member introduce during this time window? bills = Bill.objects.filter(sponsor=person, congress=congress, introduced_date__gte=startdate, introduced_date__lte=enddate) stats["bills-introduced"] = { "value": bills.count(), } # How many bills were enacted within this time window? #bills_enacted = bills.filter(current_status__in=BillStatus.final_status_passed_bill, # current_status_date__gte=startdate, current_status_date__lte=enddate) def was_bill_enacted(b, startdate, enddate): return b.was_enacted_ex( restrict_to_activity_in_date_range=(startdate.isoformat(), enddate.isoformat())) bills_enacted = [ b for b in bills if was_bill_enacted(b, startdate, enddate) ] stats["bills-enacted"] = { "value": len(bills_enacted), "bills": make_bill_entries(bills_enacted), } bills = list(bills) was_reported = [] has_cmte_leaders = [] has_cosponsors_both_parties = 0 has_companion = [] for bill in bills: # In order to test these remaining factors, we have to look more closely # at the bill because we can only use activities that ocurred during the # time window so that if we re-run this script on the same window at a # later date nothing changes -- i.e. future activitiy on bills should # not affect 1st Session statistics. # Check if the bill was reported during this time period. for datestr, st, text, srcxml in bill.major_actions: date = eval(datestr) if isinstance(date, datetime.datetime): date = date.date() if date >= startdate and date <= enddate and st == BillStatus.reported: was_reported.append(bill) break # make sure not to double-count any bills in case of data errors # Check whether any cosponsors are on relevant committees. cosponsors = list( Cosponsor.objects.filter(bill=bill, joined__gte=startdate, joined__lte=enddate).select_related( "person", "role")) x = False for committee in list(bill.committees.all()): for cosponsor in cosponsors: if committee_membership.get(cosponsor.person.id, {}).get( committee.code) in (CommitteeMemberRole.ranking_member, CommitteeMemberRole.vice_chairman, CommitteeMemberRole.chairman): x = True if x: has_cmte_leaders.append(bill) # Check whether there's a cosponsor from both parties. co_d = False co_r = False for cosponsor in cosponsors: if cosponsor.role.party == "Democrat": co_d = True if cosponsor.role.party == "Republican": co_r = True if co_d and co_r: has_cosponsors_both_parties += 1 # Check if a companion bill was introduced during the time period. if RelatedBill.objects.filter( bill=bill, relation="identical", related_bill__introduced_date__gte=startdate, related_bill__introduced_date__lte=enddate).exists(): has_companion.append(bill) stats["bills-reported"] = { "value": len(was_reported), "bills": make_bill_entries(was_reported), } stats["bills-with-committee-leaders"] = { "value": len(has_cmte_leaders), "bills": make_bill_entries(has_cmte_leaders), } if len(bills) > 10: stats["bills-with-cosponsors-both-parties"] = { "value": 100.0 * float(has_cosponsors_both_parties) / float(len(bills)), "num_bills": len(bills), } stats["bills-with-companion"] = { "value": len(has_companion), "other_chamber": RoleType.by_value(role.role_type).congress_chamber_other, "bills": make_bill_entries(has_companion), }
def get_sponsor_stats(person, role, stats, congress, startdate, enddate, committee_membership): # How many bills did the Member introduce during this time window? bills = Bill.objects.filter(sponsor=person, congress=congress, introduced_date__gte=startdate, introduced_date__lte=enddate) stats["bills-introduced"] = { "value": bills.count(), } # How many bills were "enacted" within this time window? Follow the was_enacted_ex logic # which includes text incorporation. Only use the date range on single-year stats because # a bill might be enacted after the end of a Congress. def was_bill_enacted(b, startdate, enddate): if (enddate-startdate).days > 365*1.5: return b.was_enacted_ex() else: raise ValueError() return b.was_enacted_ex(restrict_to_activity_in_date_range=(startdate.isoformat(), enddate.isoformat())) bills_enacted = [b for b in bills if was_bill_enacted(b, startdate, enddate)] stats["bills-enacted-ti"] = { "value": len(bills_enacted), "bills": make_bill_entries(bills_enacted), } # In order to test these remaining factors, we have to look more closely # at the bill because we can only use activities that ocurred during the # time window so that if we re-run this script on the same window at a # later date nothing changes -- i.e. future activitiy on bills should # not affect 1st Session statistics. Mostly. bills = list(bills) was_reported = [] has_cmte_leaders = [] has_cosponsors_both_parties = [] has_companion = [] for bill in bills: # Check if the bill was reported during this time period. for datestr, st, text, srcxml in bill.major_actions: date = eval(datestr) if isinstance(date, datetime.datetime): date = date.date() if date >= startdate and date <= enddate and st == BillStatus.reported: was_reported.append(bill) break # make sure not to double-count any bills in case of data errors # Check whether any cosponsors are on relevant committees. # Warning: Committee membership data is volatile, so re-running the stats may come out different. cosponsors = list(Cosponsor.objects.filter(bill=bill, joined__gte=startdate, joined__lte=enddate).select_related("person", "role")) x = False for committee in list(bill.committees.all()): for cosponsor in cosponsors: if committee_membership.get(cosponsor.person.id, {}).get(committee.code) in (CommitteeMemberRole.ranking_member, CommitteeMemberRole.vice_chair, CommitteeMemberRole.chair): x = True if x: has_cmte_leaders.append(bill) # Check whether there's a cosponsor from both parties. # Warning: If someone switches parties, this will change. co_d = False co_r = False for cosponsor in cosponsors: if cosponsor.role.party == "Democrat": co_d = True if cosponsor.role.party == "Republican": co_r = True if co_d and co_r: has_cosponsors_both_parties.append(bill) # Check if a companion bill was introduced during the time period. if RelatedBill.objects.filter(bill=bill, relation="identical", related_bill__introduced_date__gte=startdate, related_bill__introduced_date__lte=enddate).exists(): has_companion.append(bill) stats["bills-reported"] = { "value": len(was_reported), "bills": make_bill_entries(was_reported), } stats["bills-with-committee-leaders"] = { "value": len(has_cmte_leaders), "bills": make_bill_entries(has_cmte_leaders), } stats["bills-with-cosponsors-both-parties-count"] = { "value": len(has_cosponsors_both_parties), "bills": make_bill_entries(has_cosponsors_both_parties), "num_bills": len(bills), } stats["bills-with-companion"] = { "value": len(has_companion), "other_chamber": RoleType.by_value(role.role_type).congress_chamber_other, "bills": make_bill_entries(has_companion), }
# same position on the same day, record all of the roles # that were found for each office on this day. by_office.setdefault(p.get_office_id(), []).append(p) # IF any office had more than one term valid on the date, # skip this date. if len([0 for roles in by_office.values() if len(roles) > 1]) > 1: dates[i] = (date, { "error": "time ambiguity" }) # Ok, count by party. else: by_party = { } for office_roles in by_office.values(): role = office_roles[0] party = role.get_party_on_date(date) key = RoleType.by_value(role.role_type).key + " - " + party by_party[key] = by_party.get(key, 0) + 1 dates[i] = (date, by_party) # Get an ordering of columns. columns = sorted(set( sum([list(counts.keys()) for date, counts in dates], []) )) w = csv.writer(sys.stdout) w.writerow(["", "date"] + columns) for i, (date, counts) in enumerate(dates): row = [i, date.isoformat()] for c in columns: row.append(counts.get(c, "-")) w.writerow(row)