def get_expected_districts(settings, abbr): expected = {} state = metadata.lookup(abbr=abbr) for chamber in state.chambers: chtype = "legislature" if chamber.chamber_type == "unicameral" else chamber.chamber_type expected[chtype] = {district.name: district.num_seats for district in chamber.districts} # remove vacancies vacancies = settings.get(abbr, {}).get("vacancies", []) if vacancies: click.secho(f"Processing {len(vacancies)} vacancies:") for vacancy in vacancies: if datetime.date.today() < vacancy["vacant_until"]: expected[vacancy["chamber"]][str(vacancy["district"])] -= 1 click.secho( "\t{chamber}-{district} (until {vacant_until})".format(**vacancy), fg="yellow" ) else: click.secho( "\t{chamber}-{district} expired {vacant_until} remove & re-run".format(**vacancy), fg="red", ) raise BadVacancy() return expected
def __new__(cls, name, bases, dct): c = super().__new__(cls, name, bases, dct) if name != "State": c.classification = "government" # while we're here, load the metadata (formerly on a cached property) name = _name_fixes.get(name, name) c.metadata = openstates_metadata.lookup(name=name) return c
def merge_ids(geojson_path): with open(geojson_path, "r") as geojson_file: geojson = json.load(geojson_file) for feature in geojson["features"]: district_type = MTFCC_MAPPING[feature["properties"]["MTFCC"]] # Identify the OCD ID by making a lookup against the CSV files # The OCD ID is the cannonical identifier of an area on # the Open States platform geoid = "{}-{}".format(district_type, feature["properties"]["GEOID"]) if geoid in SKIPPED_GEOIDS: continue for row in ocd_ids: if row["census_geoid"] == geoid: ocd_id = row["id"] break else: print(feature["properties"]) raise AssertionError( "Could not find OCD ID for GEOID {}".format(geoid)) # Although OCD IDs contain the state postal code, parsing # an ID to determine structured data is bad practice, # so add a standalone state postal abbreviation property too state = us.states.lookup(feature["properties"]["STATEFP"]).abbr.lower() state_meta = metadata.lookup(abbr=state) if ocd_id in OCD_FIXES: ocd_id = OCD_FIXES[ocd_id] if district_type == "cd": cd_num = feature["properties"]["CD116FP"] if cd_num in ("00", "98"): cd_num = "AL" district_name = f"{state.upper()}-{cd_num}" else: district = state_meta.lookup_district(ocd_id) district_name = district.name if not district: raise ValueError(f"no {ocd_id} {district_type}") feature["properties"] = { "ocdid": ocd_id, "type": district_type, "state": state, "name": district_name, } if district_type == "cd": output_filename = f"data/geojson/us-{district_type}.geojson" else: output_filename = f"data/geojson/{state}-{district_type}.geojson" print(f"{geojson_path} => {output_filename}") with open(output_filename, "w") as geojson_file: json.dump(geojson, geojson_file)
def validate_jurisdictions(person, municipalities): errors = [] for role in person.get("roles", []): jid = role.get("jurisdiction") try: state = metadata.lookup(jurisdiction_id=jid) except KeyError: state = None if jid and (not state and jid not in municipalities): errors.append(f"{jid} is not a valid jurisdiction_id") return errors
def update_bill_fields_for_state(abbr): from ..data.models import Bill from ..importers.computed_fields import update_bill_fields state = metadata.lookup(abbr=abbr) with transaction.atomic(): bills = Bill.objects.filter( legislative_session__jurisdiction=state.jurisdiction_id) with click.progressbar(bills, label=f"updating {abbr} bills") as bills_p: for bill in bills_p: update_bill_fields(bill, save=True)
def incoming_merge(abbr, existing_people, new_people, retirement): unmatched = [] seats_for_district = {} state = metadata.lookup(abbr=abbr) for chamber in state.chambers: chtype = "legislature" if chamber.chamber_type == "unicameral" else chamber.chamber_type seats_for_district[chtype] = { district.name: district.num_seats for district in chamber.districts } # find candidate(s) for each new person for new in new_people: matched = False role_matches = [] for existing in existing_people: name_match = new["name"] == existing["name"] role_match = False for role in existing["roles"]: role.pop("start_date", None) seats = seats_for_district[role["type"]].get( role["district"], 1) if new["roles"][0] == role and seats == 1: role_match = True break if name_match or role_match: matched = interactive_merge(abbr, existing, new, name_match, role_match, retirement) if matched: break # if we haven't matched and this was a role match, save this for later if role_match: role_matches.append(existing) else: # not matched unmatched.append((new, role_matches)) return unmatched
def abbr_to_jid(abbr): return openstates_metadata.lookup(abbr=abbr).jurisdiction_id
def jid_to_abbr(jid): return openstates_metadata.lookup(jurisdiction_id=jid).abbr.lower()
def load_person(data): # import has to be here so that Django is set up from openstates.data.models import Person, Organization, Post fields = dict( id=data["id"], name=data["name"], given_name=data.get("given_name", ""), family_name=data.get("family_name", ""), gender=data.get("gender", ""), biography=data.get("biography", ""), birth_date=data.get("birth_date", ""), death_date=data.get("death_date", ""), image=data.get("image", ""), extras=data.get("extras", {}), ) person, created, updated = get_update_or_create(Person, fields, ["id"]) updated |= update_subobjects(person, "other_names", data.get("other_names", [])) updated |= update_subobjects(person, "links", data.get("links", [])) updated |= update_subobjects(person, "sources", data.get("sources", [])) identifiers = [] for scheme, value in data.get("ids", {}).items(): identifiers.append({"scheme": scheme, "identifier": value}) for identifier in data.get("other_identifiers", []): identifiers.append(identifier) updated |= update_subobjects(person, "identifiers", identifiers) contact_details = [] for cd in data.get("contact_details", []): for type in ("address", "email", "voice", "fax"): if cd.get(type): contact_details.append({ "note": cd.get("note", ""), "type": type, "value": cd[type] }) updated |= update_subobjects(person, "contact_details", contact_details) memberships = [] primary_party = "" active_division_id = "" current_state = "" for party in data.get("party", []): party_name = party["name"] try: org = cached_lookup(Organization, classification="party", name=party["name"]) except Organization.DoesNotExist: click.secho(f"no such party {party['name']}", fg="red") raise CancelTransaction() memberships.append({ "organization": org, "start_date": party.get("start_date", ""), "end_date": party.get("end_date", ""), }) if role_is_active(party): if primary_party in MAJOR_PARTIES and party_name in MAJOR_PARTIES: raise ValueError( f"two primary parties for ({data['name']} {data['id']})") elif primary_party in MAJOR_PARTIES: # already set correct primary party, so do nothing pass else: primary_party = party_name for role in data.get("roles", []): if role["type"] not in ("upper", "lower", "legislature"): raise ValueError("unsupported role type") try: org = cached_lookup( Organization, classification=role["type"], jurisdiction_id=role["jurisdiction"], ) post = org.posts.get(label=role["district"]) except Organization.DoesNotExist: click.secho( f"{person} no such organization {role['jurisdiction']} {role['type']}", fg="red", ) raise CancelTransaction() except Post.DoesNotExist: # if this is a legacy district, be quiet lds = legacy_districts(jurisdiction_id=role["jurisdiction"]) if role["district"] not in lds[role["type"]]: click.secho(f"no such post {role}", fg="red") raise CancelTransaction() else: post = None if role_is_active(role): state_metadata = metadata.lookup( jurisdiction_id=role["jurisdiction"]) district = state_metadata.lookup_district(name=str( role["district"]), chamber=role["type"]) assert district active_division_id = district.division_id current_state = state_metadata.abbr.upper() elif not current_state: # set current_state to *something* -- since legislators # are only going to ever appear in one state this is fine # it may become necessary to make this smarter if legislators start # crossing state lines, but we don't have any examples of this state_metadata = metadata.lookup( jurisdiction_id=role["jurisdiction"]) current_state = state_metadata.abbr.upper() memberships.append({ "organization": org, "post": post, "start_date": role.get("start_date", ""), "end_date": role.get("end_date", ""), }) # note that we don't manage committee memberships here updated |= update_subobjects( person, "memberships", memberships, read_manager=person.memberships.exclude( organization__classification="committee"), ) # set computed fields (avoid extra save) if (person.current_role_division_id != active_division_id or person.primary_party != primary_party or person.current_state != current_state): person.current_role_division_id = active_division_id person.current_state = current_state person.primary_party = primary_party person.save() return created, updated
def load_person(data): # import has to be here so that Django is set up from openstates.data.models import Person, Organization, Post fields = dict( id=data["id"], name=data["name"], given_name=data.get("given_name", ""), family_name=data.get("family_name", ""), gender=data.get("gender", ""), biography=data.get("biography", ""), birth_date=data.get("birth_date", ""), death_date=data.get("death_date", ""), image=data.get("image", ""), extras=data.get("extras", {}), ) person, created, updated = get_update_or_create(Person, fields, ["id"]) updated |= update_subobjects(person, "other_names", data.get("other_names", [])) updated |= update_subobjects(person, "links", data.get("links", [])) updated |= update_subobjects(person, "sources", data.get("sources", [])) identifiers = [] for scheme, value in data.get("ids", {}).items(): identifiers.append({"scheme": scheme, "identifier": value}) for identifier in data.get("other_identifiers", []): identifiers.append(identifier) updated |= update_subobjects(person, "identifiers", identifiers) contact_details = [] for cd in data.get("contact_details", []): for type in ("address", "email", "voice", "fax"): if cd.get(type): contact_details.append( {"note": cd.get("note", ""), "type": type, "value": cd[type]} ) updated |= update_subobjects(person, "contact_details", contact_details) memberships = [] primary_party = "" current_jurisdiction_id = None current_role = None for party in data.get("party", []): party_name = party["name"] try: org = cached_lookup(Organization, classification="party", name=party["name"]) except Organization.DoesNotExist: click.secho(f"no such party {party['name']}", fg="red") raise CancelTransaction() memberships.append( { "organization": org, "start_date": party.get("start_date", ""), "end_date": party.get("end_date", ""), } ) if role_is_active(party): if primary_party in MAJOR_PARTIES and party_name in MAJOR_PARTIES: raise ValueError(f"two primary parties for ({data['name']} {data['id']})") elif primary_party in MAJOR_PARTIES: # already set correct primary party, so do nothing pass else: primary_party = party_name for role in data.get("roles", []): if role["type"] in ("mayor",): role_name = "Mayor" org_type = "government" use_district = False elif role["type"] == "governor": role_name = "Governor" if role["jurisdiction"] == "ocd-jurisdiction/country:us/district:dc/government": role_name = "Mayor" org_type = "executive" use_district = False elif role["type"] in ("upper", "lower", "legislature"): org_type = role["type"] use_district = True else: raise ValueError("unsupported role type") try: org = cached_lookup( Organization, classification=org_type, jurisdiction_id=role["jurisdiction"] ) if use_district: post = org.posts.get(label=role["district"]) else: post = None except Organization.DoesNotExist: click.secho( f"{person} no such organization {role['jurisdiction']} {org_type}", fg="red" ) raise CancelTransaction() except Post.DoesNotExist: # if this is a legacy district, be quiet lds = legacy_districts(jurisdiction_id=role["jurisdiction"]) if role["district"] not in lds[role["type"]]: click.secho(f"no such post {role}", fg="red") raise CancelTransaction() else: post = None if role_is_active(role): current_jurisdiction_id = role["jurisdiction"] current_role = {"org_classification": org_type, "district": None, "division_id": None} if use_district: state_metadata = metadata.lookup(jurisdiction_id=role["jurisdiction"]) district = state_metadata.lookup_district( name=str(role["district"]), chamber=role["type"] ) assert district current_role["division_id"] = district.division_id current_role["title"] = getattr(state_metadata, role["type"]).title # try to force district to an int for sorting, but allow strings for non-numeric districts try: current_role["district"] = int(role["district"]) except ValueError: current_role["district"] = str(role["district"]) else: current_role["title"] = role_name elif not current_jurisdiction_id: current_jurisdiction_id = role["jurisdiction"] membership = { "organization": org, "post": post, "start_date": role.get("start_date", ""), "end_date": role.get("end_date", ""), } if not use_district: membership["role"] = role_name memberships.append(membership) # note that we don't manage committee memberships here updated |= update_subobjects( person, "memberships", memberships, read_manager=person.memberships.exclude(organization__classification="committee"), ) # set computed fields (avoid extra save) if ( person.primary_party != primary_party or person.current_role != current_role or person.current_jurisdiction_id != current_jurisdiction_id ): person.primary_party = primary_party person.current_role = current_role person.current_jurisdiction_id = current_jurisdiction_id person.save() return created, updated
def make_governors(): with open("governors.csv") as f: data = csv.DictReader(f) for line in data: state = line["state"] name = line["name"] given_name = line["first_name"] family_name = line["last_name"] party = line["party"] birth_date = line["birth_date"] start_date = line["start_date"] end_date = line["end_date"] website = line["website"] twitter = line["twitter"] webform = line["webform"] full_address = "; ".join( [n.strip() for n in line["address"].splitlines()]) phone = line["phone"] email = line["email"] fax = line["fax"] contact = {"note": "Capitol Office"} if full_address: contact["address"] = full_address if fax: contact["fax"] = reformat_phone_number(fax) if phone: contact["voice"] = reformat_phone_number(phone) if email: contact["email"] = email ids = {} if twitter: ids["twitter"] = twitter jid = metadata.lookup(name=state).jurisdiction_id abbr = metadata.lookup(name=state).abbr.lower() obj = OrderedDict({ "id": ocd_uuid("person"), "name": name, "given_name": given_name, "family_name": family_name, "birth_date": birth_date, "party": [{ "name": party }], "roles": [{ "jurisdiction": jid, "type": "governor", "start_date": start_date, "end_date": end_date, }], "contact_details": [contact], "ids": ids, "sources": [{ "url": website }], "links": [{ "url": website }, { "url": webform, "note": "webform" }], }) outdir = f"data/{abbr}/executive/" os.makedirs(outdir) dump_obj(obj, output_dir=outdir)
def get_jurisdiction_id(abbr): return metadata.lookup(abbr=abbr).jurisdiction_id
def legacy_districts(**kwargs): """ can take jurisdiction_id or abbr via kwargs """ legacy_districts = {"upper": [], "lower": []} for d in metadata.lookup(**kwargs).legacy_districts: legacy_districts[d.chamber_type].append(d.name) return legacy_districts