Example #1
0
 def __init__(self, abbr, settings):
     self.http_whitelist = tuple(settings.get("http_whitelist", []))
     self.expected = get_expected_districts(settings, abbr)
     self.valid_parties = set(settings["parties"])
     self.errors = defaultdict(list)
     self.warnings = defaultdict(list)
     self.person_count = 0
     self.retired_count = 0
     self.org_count = 0
     self.missing_person_id = 0
     self.missing_person_id_percent = 0
     self.role_types = defaultdict(int)
     self.parent_types = defaultdict(int)
     self.person_mapping = {}
     self.parties = Counter()
     self.contact_counts = Counter()
     self.id_counts = Counter()
     self.optional_fields = Counter()
     self.extra_counts = Counter()
     # role type -> district -> person
     self.active_legislators = defaultdict(lambda: defaultdict(list))
     # field name -> value -> person
     self.duplicate_values = defaultdict(lambda: defaultdict(list))
     self.legacy_districts = legacy_districts(abbr=abbr)
     self.municipalities = [m["id"] for m in load_municipalities(abbr=abbr)]
     for m in self.municipalities:
         if not JURISDICTION_RE.match(m):
             raise ValueError(f"invalid municipality id {m}")
Example #2
0
 def __init__(self, abbr, settings):
     self.http_whitelist = tuple(settings.get("http_whitelist", []))
     self.expected = get_expected_districts(settings, abbr)
     self.valid_parties = set(settings["parties"])
     self.errors = defaultdict(list)
     self.warnings = defaultdict(list)
     # role type -> district -> filename
     self.active_legislators = defaultdict(lambda: defaultdict(list))
     # field name -> value -> filename
     self.duplicate_values = defaultdict(lambda: defaultdict(list))
     self.legacy_districts = legacy_districts(abbr=abbr)
     self.municipalities = [m["id"] for m in load_municipalities(abbr=abbr)]
     for m in self.municipalities:
         if not JURISDICTION_RE.match(m):
             raise ValueError(f"invalid municipality id {m}")
Example #3
0
def load_person(data):
    # import has to be here so that Django is set up
    from openstates.data.models import Person, Organization, Post

    fields = dict(
        id=data["id"],
        name=data["name"],
        given_name=data.get("given_name", ""),
        family_name=data.get("family_name", ""),
        gender=data.get("gender", ""),
        biography=data.get("biography", ""),
        birth_date=data.get("birth_date", ""),
        death_date=data.get("death_date", ""),
        image=data.get("image", ""),
        extras=data.get("extras", {}),
    )
    person, created, updated = get_update_or_create(Person, fields, ["id"])

    updated |= update_subobjects(person, "other_names",
                                 data.get("other_names", []))
    updated |= update_subobjects(person, "links", data.get("links", []))
    updated |= update_subobjects(person, "sources", data.get("sources", []))

    identifiers = []
    for scheme, value in data.get("ids", {}).items():
        identifiers.append({"scheme": scheme, "identifier": value})
    for identifier in data.get("other_identifiers", []):
        identifiers.append(identifier)
    updated |= update_subobjects(person, "identifiers", identifiers)

    contact_details = []
    for cd in data.get("contact_details", []):
        for type in ("address", "email", "voice", "fax"):
            if cd.get(type):
                contact_details.append({
                    "note": cd.get("note", ""),
                    "type": type,
                    "value": cd[type]
                })
    updated |= update_subobjects(person, "contact_details", contact_details)

    memberships = []
    primary_party = ""
    active_division_id = ""
    current_state = ""
    for party in data.get("party", []):
        party_name = party["name"]
        try:
            org = cached_lookup(Organization,
                                classification="party",
                                name=party["name"])
        except Organization.DoesNotExist:
            click.secho(f"no such party {party['name']}", fg="red")
            raise CancelTransaction()
        memberships.append({
            "organization": org,
            "start_date": party.get("start_date", ""),
            "end_date": party.get("end_date", ""),
        })
        if role_is_active(party):
            if primary_party in MAJOR_PARTIES and party_name in MAJOR_PARTIES:
                raise ValueError(
                    f"two primary parties for ({data['name']} {data['id']})")
            elif primary_party in MAJOR_PARTIES:
                # already set correct primary party, so do nothing
                pass
            else:
                primary_party = party_name
    for role in data.get("roles", []):
        if role["type"] not in ("upper", "lower", "legislature"):
            raise ValueError("unsupported role type")
        try:
            org = cached_lookup(
                Organization,
                classification=role["type"],
                jurisdiction_id=role["jurisdiction"],
            )
            post = org.posts.get(label=role["district"])
        except Organization.DoesNotExist:
            click.secho(
                f"{person} no such organization {role['jurisdiction']} {role['type']}",
                fg="red",
            )
            raise CancelTransaction()
        except Post.DoesNotExist:
            # if this is a legacy district, be quiet
            lds = legacy_districts(jurisdiction_id=role["jurisdiction"])
            if role["district"] not in lds[role["type"]]:
                click.secho(f"no such post {role}", fg="red")
                raise CancelTransaction()
            else:
                post = None
        if role_is_active(role):
            state_metadata = metadata.lookup(
                jurisdiction_id=role["jurisdiction"])
            district = state_metadata.lookup_district(name=str(
                role["district"]),
                                                      chamber=role["type"])
            assert district
            active_division_id = district.division_id
            current_state = state_metadata.abbr.upper()
        elif not current_state:
            # set current_state to *something* -- since legislators
            # are only going to ever appear in one state this is fine
            # it may become necessary to make this smarter if legislators start
            # crossing state lines, but we don't have any examples of this
            state_metadata = metadata.lookup(
                jurisdiction_id=role["jurisdiction"])
            current_state = state_metadata.abbr.upper()
        memberships.append({
            "organization": org,
            "post": post,
            "start_date": role.get("start_date", ""),
            "end_date": role.get("end_date", ""),
        })

    # note that we don't manage committee memberships here
    updated |= update_subobjects(
        person,
        "memberships",
        memberships,
        read_manager=person.memberships.exclude(
            organization__classification="committee"),
    )

    # set computed fields (avoid extra save)
    if (person.current_role_division_id != active_division_id
            or person.primary_party != primary_party
            or person.current_state != current_state):
        person.current_role_division_id = active_division_id
        person.current_state = current_state
        person.primary_party = primary_party
        person.save()

    return created, updated
Example #4
0
def load_person(data):
    # import has to be here so that Django is set up
    from openstates.data.models import Person, Organization, Post

    fields = dict(
        id=data["id"],
        name=data["name"],
        given_name=data.get("given_name", ""),
        family_name=data.get("family_name", ""),
        gender=data.get("gender", ""),
        biography=data.get("biography", ""),
        birth_date=data.get("birth_date", ""),
        death_date=data.get("death_date", ""),
        image=data.get("image", ""),
        extras=data.get("extras", {}),
    )
    person, created, updated = get_update_or_create(Person, fields, ["id"])

    updated |= update_subobjects(person, "other_names", data.get("other_names", []))
    updated |= update_subobjects(person, "links", data.get("links", []))
    updated |= update_subobjects(person, "sources", data.get("sources", []))

    identifiers = []
    for scheme, value in data.get("ids", {}).items():
        identifiers.append({"scheme": scheme, "identifier": value})
    for identifier in data.get("other_identifiers", []):
        identifiers.append(identifier)
    updated |= update_subobjects(person, "identifiers", identifiers)

    contact_details = []
    for cd in data.get("contact_details", []):
        for type in ("address", "email", "voice", "fax"):
            if cd.get(type):
                contact_details.append(
                    {"note": cd.get("note", ""), "type": type, "value": cd[type]}
                )
    updated |= update_subobjects(person, "contact_details", contact_details)

    memberships = []
    primary_party = ""
    current_jurisdiction_id = None
    current_role = None
    for party in data.get("party", []):
        party_name = party["name"]
        try:
            org = cached_lookup(Organization, classification="party", name=party["name"])
        except Organization.DoesNotExist:
            click.secho(f"no such party {party['name']}", fg="red")
            raise CancelTransaction()
        memberships.append(
            {
                "organization": org,
                "start_date": party.get("start_date", ""),
                "end_date": party.get("end_date", ""),
            }
        )
        if role_is_active(party):
            if primary_party in MAJOR_PARTIES and party_name in MAJOR_PARTIES:
                raise ValueError(f"two primary parties for ({data['name']} {data['id']})")
            elif primary_party in MAJOR_PARTIES:
                # already set correct primary party, so do nothing
                pass
            else:
                primary_party = party_name
    for role in data.get("roles", []):
        if role["type"] in ("mayor",):
            role_name = "Mayor"
            org_type = "government"
            use_district = False
        elif role["type"] == "governor":
            role_name = "Governor"
            if role["jurisdiction"] == "ocd-jurisdiction/country:us/district:dc/government":
                role_name = "Mayor"
            org_type = "executive"
            use_district = False
        elif role["type"] in ("upper", "lower", "legislature"):
            org_type = role["type"]
            use_district = True
        else:
            raise ValueError("unsupported role type")
        try:
            org = cached_lookup(
                Organization, classification=org_type, jurisdiction_id=role["jurisdiction"]
            )
            if use_district:
                post = org.posts.get(label=role["district"])
            else:
                post = None
        except Organization.DoesNotExist:
            click.secho(
                f"{person} no such organization {role['jurisdiction']} {org_type}", fg="red"
            )
            raise CancelTransaction()
        except Post.DoesNotExist:
            # if this is a legacy district, be quiet
            lds = legacy_districts(jurisdiction_id=role["jurisdiction"])
            if role["district"] not in lds[role["type"]]:
                click.secho(f"no such post {role}", fg="red")
                raise CancelTransaction()
            else:
                post = None

        if role_is_active(role):
            current_jurisdiction_id = role["jurisdiction"]

            current_role = {"org_classification": org_type, "district": None, "division_id": None}
            if use_district:
                state_metadata = metadata.lookup(jurisdiction_id=role["jurisdiction"])
                district = state_metadata.lookup_district(
                    name=str(role["district"]), chamber=role["type"]
                )
                assert district
                current_role["division_id"] = district.division_id
                current_role["title"] = getattr(state_metadata, role["type"]).title
                # try to force district to an int for sorting, but allow strings for non-numeric districts
                try:
                    current_role["district"] = int(role["district"])
                except ValueError:
                    current_role["district"] = str(role["district"])
            else:
                current_role["title"] = role_name
        elif not current_jurisdiction_id:
            current_jurisdiction_id = role["jurisdiction"]

        membership = {
            "organization": org,
            "post": post,
            "start_date": role.get("start_date", ""),
            "end_date": role.get("end_date", ""),
        }
        if not use_district:
            membership["role"] = role_name
        memberships.append(membership)

    # note that we don't manage committee memberships here
    updated |= update_subobjects(
        person,
        "memberships",
        memberships,
        read_manager=person.memberships.exclude(organization__classification="committee"),
    )

    # set computed fields (avoid extra save)
    if (
        person.primary_party != primary_party
        or person.current_role != current_role
        or person.current_jurisdiction_id != current_jurisdiction_id
    ):
        person.primary_party = primary_party
        person.current_role = current_role
        person.current_jurisdiction_id = current_jurisdiction_id
        person.save()

    return created, updated