def __init__(self, abbr, settings): self.http_whitelist = tuple(settings.get("http_whitelist", [])) self.expected = get_expected_districts(settings, abbr) self.valid_parties = set(settings["parties"]) self.errors = defaultdict(list) self.warnings = defaultdict(list) self.person_count = 0 self.retired_count = 0 self.org_count = 0 self.missing_person_id = 0 self.missing_person_id_percent = 0 self.role_types = defaultdict(int) self.parent_types = defaultdict(int) self.person_mapping = {} self.parties = Counter() self.contact_counts = Counter() self.id_counts = Counter() self.optional_fields = Counter() self.extra_counts = Counter() # role type -> district -> person self.active_legislators = defaultdict(lambda: defaultdict(list)) # field name -> value -> person self.duplicate_values = defaultdict(lambda: defaultdict(list)) self.legacy_districts = legacy_districts(abbr=abbr) self.municipalities = [m["id"] for m in load_municipalities(abbr=abbr)] for m in self.municipalities: if not JURISDICTION_RE.match(m): raise ValueError(f"invalid municipality id {m}")
def __init__(self, abbr, settings): self.http_whitelist = tuple(settings.get("http_whitelist", [])) self.expected = get_expected_districts(settings, abbr) self.valid_parties = set(settings["parties"]) self.errors = defaultdict(list) self.warnings = defaultdict(list) # role type -> district -> filename self.active_legislators = defaultdict(lambda: defaultdict(list)) # field name -> value -> filename self.duplicate_values = defaultdict(lambda: defaultdict(list)) self.legacy_districts = legacy_districts(abbr=abbr) self.municipalities = [m["id"] for m in load_municipalities(abbr=abbr)] for m in self.municipalities: if not JURISDICTION_RE.match(m): raise ValueError(f"invalid municipality id {m}")
def load_person(data): # import has to be here so that Django is set up from openstates.data.models import Person, Organization, Post fields = dict( id=data["id"], name=data["name"], given_name=data.get("given_name", ""), family_name=data.get("family_name", ""), gender=data.get("gender", ""), biography=data.get("biography", ""), birth_date=data.get("birth_date", ""), death_date=data.get("death_date", ""), image=data.get("image", ""), extras=data.get("extras", {}), ) person, created, updated = get_update_or_create(Person, fields, ["id"]) updated |= update_subobjects(person, "other_names", data.get("other_names", [])) updated |= update_subobjects(person, "links", data.get("links", [])) updated |= update_subobjects(person, "sources", data.get("sources", [])) identifiers = [] for scheme, value in data.get("ids", {}).items(): identifiers.append({"scheme": scheme, "identifier": value}) for identifier in data.get("other_identifiers", []): identifiers.append(identifier) updated |= update_subobjects(person, "identifiers", identifiers) contact_details = [] for cd in data.get("contact_details", []): for type in ("address", "email", "voice", "fax"): if cd.get(type): contact_details.append({ "note": cd.get("note", ""), "type": type, "value": cd[type] }) updated |= update_subobjects(person, "contact_details", contact_details) memberships = [] primary_party = "" active_division_id = "" current_state = "" for party in data.get("party", []): party_name = party["name"] try: org = cached_lookup(Organization, classification="party", name=party["name"]) except Organization.DoesNotExist: click.secho(f"no such party {party['name']}", fg="red") raise CancelTransaction() memberships.append({ "organization": org, "start_date": party.get("start_date", ""), "end_date": party.get("end_date", ""), }) if role_is_active(party): if primary_party in MAJOR_PARTIES and party_name in MAJOR_PARTIES: raise ValueError( f"two primary parties for ({data['name']} {data['id']})") elif primary_party in MAJOR_PARTIES: # already set correct primary party, so do nothing pass else: primary_party = party_name for role in data.get("roles", []): if role["type"] not in ("upper", "lower", "legislature"): raise ValueError("unsupported role type") try: org = cached_lookup( Organization, classification=role["type"], jurisdiction_id=role["jurisdiction"], ) post = org.posts.get(label=role["district"]) except Organization.DoesNotExist: click.secho( f"{person} no such organization {role['jurisdiction']} {role['type']}", fg="red", ) raise CancelTransaction() except Post.DoesNotExist: # if this is a legacy district, be quiet lds = legacy_districts(jurisdiction_id=role["jurisdiction"]) if role["district"] not in lds[role["type"]]: click.secho(f"no such post {role}", fg="red") raise CancelTransaction() else: post = None if role_is_active(role): state_metadata = metadata.lookup( jurisdiction_id=role["jurisdiction"]) district = state_metadata.lookup_district(name=str( role["district"]), chamber=role["type"]) assert district active_division_id = district.division_id current_state = state_metadata.abbr.upper() elif not current_state: # set current_state to *something* -- since legislators # are only going to ever appear in one state this is fine # it may become necessary to make this smarter if legislators start # crossing state lines, but we don't have any examples of this state_metadata = metadata.lookup( jurisdiction_id=role["jurisdiction"]) current_state = state_metadata.abbr.upper() memberships.append({ "organization": org, "post": post, "start_date": role.get("start_date", ""), "end_date": role.get("end_date", ""), }) # note that we don't manage committee memberships here updated |= update_subobjects( person, "memberships", memberships, read_manager=person.memberships.exclude( organization__classification="committee"), ) # set computed fields (avoid extra save) if (person.current_role_division_id != active_division_id or person.primary_party != primary_party or person.current_state != current_state): person.current_role_division_id = active_division_id person.current_state = current_state person.primary_party = primary_party person.save() return created, updated
def load_person(data): # import has to be here so that Django is set up from openstates.data.models import Person, Organization, Post fields = dict( id=data["id"], name=data["name"], given_name=data.get("given_name", ""), family_name=data.get("family_name", ""), gender=data.get("gender", ""), biography=data.get("biography", ""), birth_date=data.get("birth_date", ""), death_date=data.get("death_date", ""), image=data.get("image", ""), extras=data.get("extras", {}), ) person, created, updated = get_update_or_create(Person, fields, ["id"]) updated |= update_subobjects(person, "other_names", data.get("other_names", [])) updated |= update_subobjects(person, "links", data.get("links", [])) updated |= update_subobjects(person, "sources", data.get("sources", [])) identifiers = [] for scheme, value in data.get("ids", {}).items(): identifiers.append({"scheme": scheme, "identifier": value}) for identifier in data.get("other_identifiers", []): identifiers.append(identifier) updated |= update_subobjects(person, "identifiers", identifiers) contact_details = [] for cd in data.get("contact_details", []): for type in ("address", "email", "voice", "fax"): if cd.get(type): contact_details.append( {"note": cd.get("note", ""), "type": type, "value": cd[type]} ) updated |= update_subobjects(person, "contact_details", contact_details) memberships = [] primary_party = "" current_jurisdiction_id = None current_role = None for party in data.get("party", []): party_name = party["name"] try: org = cached_lookup(Organization, classification="party", name=party["name"]) except Organization.DoesNotExist: click.secho(f"no such party {party['name']}", fg="red") raise CancelTransaction() memberships.append( { "organization": org, "start_date": party.get("start_date", ""), "end_date": party.get("end_date", ""), } ) if role_is_active(party): if primary_party in MAJOR_PARTIES and party_name in MAJOR_PARTIES: raise ValueError(f"two primary parties for ({data['name']} {data['id']})") elif primary_party in MAJOR_PARTIES: # already set correct primary party, so do nothing pass else: primary_party = party_name for role in data.get("roles", []): if role["type"] in ("mayor",): role_name = "Mayor" org_type = "government" use_district = False elif role["type"] == "governor": role_name = "Governor" if role["jurisdiction"] == "ocd-jurisdiction/country:us/district:dc/government": role_name = "Mayor" org_type = "executive" use_district = False elif role["type"] in ("upper", "lower", "legislature"): org_type = role["type"] use_district = True else: raise ValueError("unsupported role type") try: org = cached_lookup( Organization, classification=org_type, jurisdiction_id=role["jurisdiction"] ) if use_district: post = org.posts.get(label=role["district"]) else: post = None except Organization.DoesNotExist: click.secho( f"{person} no such organization {role['jurisdiction']} {org_type}", fg="red" ) raise CancelTransaction() except Post.DoesNotExist: # if this is a legacy district, be quiet lds = legacy_districts(jurisdiction_id=role["jurisdiction"]) if role["district"] not in lds[role["type"]]: click.secho(f"no such post {role}", fg="red") raise CancelTransaction() else: post = None if role_is_active(role): current_jurisdiction_id = role["jurisdiction"] current_role = {"org_classification": org_type, "district": None, "division_id": None} if use_district: state_metadata = metadata.lookup(jurisdiction_id=role["jurisdiction"]) district = state_metadata.lookup_district( name=str(role["district"]), chamber=role["type"] ) assert district current_role["division_id"] = district.division_id current_role["title"] = getattr(state_metadata, role["type"]).title # try to force district to an int for sorting, but allow strings for non-numeric districts try: current_role["district"] = int(role["district"]) except ValueError: current_role["district"] = str(role["district"]) else: current_role["title"] = role_name elif not current_jurisdiction_id: current_jurisdiction_id = role["jurisdiction"] membership = { "organization": org, "post": post, "start_date": role.get("start_date", ""), "end_date": role.get("end_date", ""), } if not use_district: membership["role"] = role_name memberships.append(membership) # note that we don't manage committee memberships here updated |= update_subobjects( person, "memberships", memberships, read_manager=person.memberships.exclude(organization__classification="committee"), ) # set computed fields (avoid extra save) if ( person.primary_party != primary_party or person.current_role != current_role or person.current_jurisdiction_id != current_jurisdiction_id ): person.primary_party = primary_party person.current_role = current_role person.current_jurisdiction_id = current_jurisdiction_id person.save() return created, updated