コード例 #1
0
ファイル: common.py プロジェクト: tophers42/people
    def to_dict(self):
        party = PARTIES.get(self.party.lower(), self.party)
        d = OrderedDict(
            {
                "id": f"ocd-person/{uuid.uuid4()}",
                "name": self.name,
                "party": [{"name": party}],
                "roles": [
                    {
                        "district": self.district,
                        "type": self.chamber,
                        "jurisdiction": get_jurisdiction_id(self.state),
                    }
                ],
                "links": self.links,
                "sources": self.sources,
            }
        )
        if self.given_name:
            d["given_name"] = self.given_name
        if self.family_name:
            d["family_name"] = self.family_name
        if self.image:
            d["image"] = self.image

        # contact details
        d["contact_details"] = []
        if self.district_office.to_dict():
            d["contact_details"].append(self.district_office.to_dict())
        if self.capitol_office.to_dict():
            d["contact_details"].append(self.capitol_office.to_dict())

        return d
コード例 #2
0
ファイル: new_person.py プロジェクト: markflaherty/people
def create_person(fname, lname, name, state, district, party, rtype, url,
                  image, start_date):
    person = OrderedDict({
        "id":
        ocd_uuid("person"),
        "name":
        name or f"{fname} {lname}",
        "given_name":
        fname,
        "family_name":
        lname,
        "image":
        image,
        "party": [{
            "name": party
        }],
        "roles": [{
            "type": rtype,
            "district": district,
            "jurisdiction": get_jurisdiction_id(state),
            "start_date": start_date,
        }],
        "links": [{
            "url": url
        }],
        "sources": [{
            "url": url
        }],
    })

    output_dir = get_data_dir(state)
    dump_obj(person, output_dir=os.path.join(output_dir, "people"))
コード例 #3
0
def to_yaml(input_dir):
    """
    Convert scraped JSON in INPUT_DIR to YAML files for this repo.

    Will put data into incoming/ directory for usage with merge.py's --incoming option.
    """

    # abbr is last piece of directory name
    abbr = None
    for piece in input_dir.split("/")[::-1]:
        if piece:
            abbr = piece
            break

    output_dir = get_data_dir(abbr)
    jurisdiction_id = get_jurisdiction_id(abbr)

    output_dir = output_dir.replace("data", "incoming")
    assert "incoming" in output_dir

    try:
        os.makedirs(os.path.join(output_dir, "legislature"))
    except FileExistsError:
        for file in glob.glob(os.path.join(output_dir, "legislature", "*.yml")):
            os.remove(file)
    process_dir(input_dir, output_dir, jurisdiction_id)
コード例 #4
0
ファイル: to_csv.py プロジェクト: jason44406/people
def to_csv(abbreviations, upload):
    """
    Sync YAML files to DB.
    """
    if not abbreviations:
        abbreviations = get_all_abbreviations()

    if upload:
        s3 = boto3.client("s3")

    for abbr in abbreviations:
        click.secho("==== {} ====".format(abbr), bold=True)
        directory = get_data_dir(abbr)
        jurisdiction_id = get_jurisdiction_id(abbr)
        person_files = sorted(
            glob.glob(os.path.join(directory, "legislature/*.yml")))
        fname = f"{abbr}.csv"
        write_csv(person_files, jurisdiction_id, fname)

        if upload:
            s3.upload_file(
                fname,
                "data.openstates.org",
                f"people/current/{abbr}.csv",
                ExtraArgs={
                    "ContentType": "text/csv",
                    "ACL": "public-read"
                },
            )
            click.secho(
                f"uploaded to data.openstates.org/people/current/{abbr}.csv",
                fg="green")
コード例 #5
0
def archive_leg_to_csv(state_abbr=None):
    output_filename = f"unmatched_{state_abbr}.csv"

    jurisdiction_id = get_jurisdiction_id(state_abbr)

    # name -> session -> count
    missing_votes = Counter()
    missing_sponsors = Counter()
    sessions_for_name = defaultdict(set)

    voters, bill_sponsors = get_unmatched(jurisdiction_id)

    for voter in voters:
        missing_votes[voter["name"]] += voter["n"]
        sessions_for_name[voter["name"]].add(voter["session"])

    for bill_sponsor in bill_sponsors:
        missing_sponsors[bill_sponsor["name"]] += bill_sponsor["n"]
        sessions_for_name[bill_sponsor["name"]].add(bill_sponsor["session"])

    all_names = sorted(sessions_for_name.keys())

    with open(output_filename, "w") as outf:
        out = csv.DictWriter(outf, ("name", "jurisdiction", "sessions", "votes", "sponsorships"))
        out.writeheader()
        for name in all_names:
            obj = {
                "name": name,
                "jurisdiction": state_abbr,
                "sessions": "; ".join(sorted(sessions_for_name[name])),
                "votes": missing_votes[name],
                "sponsorships": missing_sponsors[name],
            }
            out.writerow(obj)
コード例 #6
0
ファイル: to_yaml.py プロジェクト: tdcjreform/people
def to_yaml(input_dir):
    """
    Convert pupa scraped JSON in INPUT_DIR to YAML files for this repo.

    Will put data into incoming/ directory for usage with merge.py's --incoming option.
    """

    # abbr is last piece of directory name
    abbr = None
    for piece in input_dir.split('/')[::-1]:
        if piece:
            abbr = piece
            break

    output_dir = get_data_dir(abbr)
    jurisdiction_id = get_jurisdiction_id(abbr)

    output_dir = output_dir.replace('data', 'incoming')
    assert 'incoming' in output_dir

    for dir in ('people', 'organizations'):
        try:
            os.makedirs(os.path.join(output_dir, dir))
        except FileExistsError:
            for file in glob.glob(os.path.join(output_dir, dir, '*.yml')):
                os.remove(file)
    process_dir(input_dir, output_dir, jurisdiction_id)
コード例 #7
0
ファイル: to_database.py プロジェクト: tdcjreform/people
def to_database(abbreviations, purge, safe):
    """
    Sync YAML files to DB.
    """
    init_django()

    if not abbreviations:
        abbreviations = get_all_abbreviations()

    settings = get_settings()

    for abbr in abbreviations:
        click.secho('==== {} ===='.format(abbr), bold=True)
        directory = get_data_dir(abbr)
        jurisdiction_id = get_jurisdiction_id(abbr)

        person_files = (glob.glob(os.path.join(directory, 'people/*.yml')) +
                        glob.glob(os.path.join(directory, 'retired/*.yml')))
        committee_files = glob.glob(os.path.join(directory, 'organizations/*.yml'))

        if safe:
            click.secho('running in safe mode, no changes will be made', fg='magenta')

        state_settings = settings[abbr]

        try:
            with transaction.atomic():
                create_posts(jurisdiction_id, state_settings)
                load_directory(person_files, 'person', jurisdiction_id, purge=purge)
                load_directory(committee_files, 'organization', jurisdiction_id, purge=purge)
                if safe:
                    click.secho('ran in safe mode, no changes were made', fg='magenta')
                    raise CancelTransaction()
        except CancelTransaction:
            pass
コード例 #8
0
ファイル: to_csv.py プロジェクト: tdcjreform/people
def to_csv(abbreviations):
    """
    Sync YAML files to DB.
    """
    if not abbreviations:
        abbreviations = get_all_abbreviations()

    for abbr in abbreviations:
        click.secho('==== {} ===='.format(abbr), bold=True)
        directory = get_data_dir(abbr)
        jurisdiction_id = get_jurisdiction_id(abbr)
        person_files = sorted(glob.glob(os.path.join(directory, 'people/*.yml')))
        write_csv(person_files, jurisdiction_id, f"csv/{abbr}_legislators.csv")
コード例 #9
0
def to_database(abbreviations, purge, safe):
    """
    Sync YAML files to DB.
    """
    init_django()

    create_parties()

    if not abbreviations:
        abbreviations = get_all_abbreviations()

    for abbr in abbreviations:
        click.secho("==== {} ====".format(abbr), bold=True)
        directory = get_data_dir(abbr)
        jurisdiction_id = get_jurisdiction_id(abbr)
        municipalities = load_municipalities(abbr)

        with transaction.atomic():
            create_municipalities(municipalities)

        person_files = (
            glob.glob(os.path.join(directory, "legislature/*.yml")) +
            glob.glob(os.path.join(directory, "executive/*.yml")) +
            glob.glob(os.path.join(directory, "municipalities/*.yml")) +
            glob.glob(os.path.join(directory, "retired/*.yml")))
        committee_files = glob.glob(
            os.path.join(directory, "organizations/*.yml"))

        if safe:
            click.secho("running in safe mode, no changes will be made",
                        fg="magenta")

        try:
            with transaction.atomic():
                load_directory(person_files,
                               "person",
                               jurisdiction_id,
                               purge=purge)
                load_directory(committee_files,
                               "organization",
                               jurisdiction_id,
                               purge=purge)
                if safe:
                    click.secho("ran in safe mode, no changes were made",
                                fg="magenta")
                    raise CancelTransaction()
        except CancelTransaction:
            sys.exit(1)
コード例 #10
0
def to_database(abbr, verbose, summary, purge, safe):
    init_django()
    directory = get_data_dir(abbr)
    jurisdiction_id = get_jurisdiction_id(abbr)

    person_files = (glob.glob(os.path.join(directory, 'people/*.yml')) +
                    glob.glob(os.path.join(directory, 'retired/*.yml')))
    committee_files = glob.glob(os.path.join(directory, 'organizations/*.yml'))

    if safe:
        click.secho('running in safe mode, no changes will be made', fg='magenta')

    try:
        with transaction.atomic():
            load_directory(person_files, 'person', jurisdiction_id, purge=purge)
            load_directory(committee_files, 'organization', jurisdiction_id, purge=purge)
            if safe:
                click.secho('ran in safe mode, no changes were made', fg='magenta')
                raise CancelTransaction()
    except CancelTransaction:
        pass
コード例 #11
0
def to_yaml(input_dir, reset):
    # TODO: remove reset option once we're in prod

    # abbr is last piece of directory name
    abbr = None
    for piece in input_dir.split('/')[::-1]:
        if piece:
            abbr = piece
            break

    output_dir = get_data_dir(abbr)
    jurisdiction_id = get_jurisdiction_id(abbr)

    for dir in ('people', 'organizations'):
        try:
            os.makedirs(os.path.join(output_dir, dir))
        except FileExistsError:
            if reset:
                for file in glob.glob(os.path.join(output_dir, dir, '*.yml')):
                    os.remove(file)
    process_dir(input_dir, output_dir, jurisdiction_id)
コード例 #12
0
ファイル: new_person.py プロジェクト: resistbot/people
def create_person(fname, lname, name, state, district, party, rtype, url,
                  image, email, start_date):
    role = {
        "type": rtype,
        "district": district,
        "jurisdiction": get_jurisdiction_id(state),
        "start_date": start_date,
    }
    if rtype in ("upper", "lower", "legislature"):
        directory = "legislature"
    elif rtype in ("mayor", ):
        directory = "municipalities"
        role.pop("district")
    elif rtype in ("governor", "lt_governor"):
        directory = "executive"
        role.pop("district")
    else:
        raise ValueError(f"unknown role type {rtype}")

    person = OrderedDict({
        "id": ocd_uuid("person"),
        "name": name or f"{fname} {lname}",
        "given_name": fname,
        "family_name": lname,
        "image": image,
        "email": email,
        "party": [{
            "name": party
        }],
        "roles": [role],
        "links": [{
            "url": url
        }],
        "sources": [{
            "url": url
        }],
    })

    output_dir = get_data_dir(state)
    dump_obj(person, output_dir=os.path.join(output_dir, directory))
コード例 #13
0
def create_person(fname, lname, name, state, district, party, rtype, url, image,
                  start_date):
    person = OrderedDict({
        'id': ocd_uuid('person'),
        'name': name or f'{fname} {lname}',
        'given_name': fname,
        'family_name': lname,
        'image': image,
        'party': [{'name': party}],
        'roles': [
            {'type': rtype,
             'district': district,
             'jurisdiction': get_jurisdiction_id(state),
             'start_date': start_date,
             }
        ],
        'links': [{'url': url}],
        'sources': [{'url': url}],
    })

    output_dir = get_data_dir(state)
    dump_obj(person, output_dir=os.path.join(output_dir, 'people'))
コード例 #14
0
def create_committee(*, name, state, parent, url):
    members = []
    click.echo("Enter members, enter a blank member to stop.")
    while True:
        mname = click.prompt("Member name ('done' to stop)")
        if mname == "done":
            break
        members.append({"name": mname})
    com = OrderedDict(
        {
            "id": ocd_uuid("organization"),
            "name": name,
            "classification": "committee",
            "jurisdiction": get_jurisdiction_id(state),
            "parent": parent,
            "sources": [{"url": url}],
            "links": [{"url": url}],
            "memberships": members,
        }
    )

    output_dir = get_data_dir(state)
    dump_obj(com, output_dir=os.path.join(output_dir, "organizations"))
コード例 #15
0
def check_historical_matches(abbr, dry=True):
    jurisdiction_id = get_jurisdiction_id(abbr)
    voters, sponsorships = get_unmatched(jurisdiction_id)
    update_objects(jurisdiction_id, voters, "vote", dry)
    update_objects(jurisdiction_id, sponsorships, "sponsorship", dry)
コード例 #16
0
ファイル: migrate_people.py プロジェクト: leehenderson/people
def process_old_file(filename, metadata):
    data = json.load(open(filename))
    if data["leg_id"] != data["_id"]:
        raise Exception()
    if data.get("active"):
        print(data)
        return
        raise Exception()
    if data.get("roles", []):
        raise Exception()

    # remove unused fields
    for k in (
            "_yearly_contributions",
            "nimsp_candidate_id",
            "votesmart_id",
            "_contributions_start_year",
            "_scraped_name",
            "_total_contributions",
            "transparencydata_id",
            "_locked_fields",
            "level",
            "nimsp_id",
            "_type",
            "country",
            "updated_at",
            "_id",
            "active",
            "roles",
            "offices",
            "notice",
            "nickname",
            "district",
            "party",
            "chamber",
            "csrfmiddlewaretoken",
            "email",
            "created_at",
            "office_address",
            "office_phone",
            "occupation",
            "_guid",
            "_code",
            "all_ids",
            "2008-2011",
    ):
        data.pop(k, None)

    # remove plus fields
    for k in [k for k in data.keys() if k.startswith("+")]:
        data.pop(k)

    leg_obj = OrderedDict({"id": ocd_uuid("person")})

    leg_obj["name"] = data.pop("full_name")
    first_name = data.pop("first_name")
    middle_name = data.pop("middle_name")
    last_name = data.pop("last_name")
    suffixes = data.pop("suffixes", "")
    suffix = data.pop("suffix", "")
    if first_name:
        leg_obj["given_name"] = first_name
    if last_name:
        leg_obj["family_name"] = last_name
    if middle_name:
        leg_obj["middle_name"] = middle_name
    if suffix:
        leg_obj["suffix"] = suffixes or suffix

    state = data.pop("state")
    jurisdiction_id = get_jurisdiction_id(state)

    # pull useful fields
    old_roles = data.pop("old_roles", {})
    parties = set()
    new_roles = []
    for session, roles in old_roles.items():
        for role in roles:
            if role["type"] in (
                    "committee member",
                    "Minority Floor Leader",
                    "Majority Floor Leader",
                    "Majority Caucus Chair",
                    "Minority Caucus Chair",
                    "Speaker Pro Tem",
                    "President Pro Tem",
                    "Senate President",
                    "Speaker of the House",
                    "Minority Whip",
                    "Majority Whip",
                    "Lt. Governor",
            ) or role.get("committee"):
                continue
            parties.add(role["party"])
            new_roles.append({
                "term": role["term"],
                "chamber": role["chamber"],
                "district": role["district"]
            })

    leg_obj["party"] = [{"name": party} for party in parties]

    # add these to leg_obj
    roles = terms_to_roles(new_roles, metadata["terms"])
    formatted_roles = []
    for chamber, district, start, end in roles:
        formatted_roles.append(
            OrderedDict({
                "district": district,
                "jurisdiction": jurisdiction_id,
                "type": chamber,
                "start_date": f"{start}-01-01",
                "end_date": f"{end}-12-31",
            }))
    leg_obj["roles"] = formatted_roles

    all_ids = data.pop("_all_ids")
    leg_id = data.pop("leg_id")
    if leg_id not in all_ids:
        all_ids.append(leg_id)

    image = data.pop("photo_url", "")
    if image:
        leg_obj["image"] = image
    url = data.pop("url", "")
    if url:
        leg_obj["links"] = [{"url": url}]
    leg_obj["sources"] = data.pop("sources")
    leg_obj["other_identifiers"] = [{
        "identifier": id_,
        "scheme": "legacy_openstates"
    } for id_ in all_ids]

    if data:
        print(data)
        raise Exception()

    output_dir = get_data_dir(state)
    dump_obj(leg_obj, output_dir=os.path.join(output_dir, "retired"))