Ejemplo n.º 1
0
def load_new_files(state):
    new_db_ids = set()
    for data, _ in itertools.chain(iter_objects(state, "people"),
                                   iter_objects(state, "retired")):
        for ids in data.get("other_identifiers", []):
            if ids["scheme"] == "legacy_openstates":
                new_db_ids.add(ids["identifier"])
    return new_db_ids
Ejemplo n.º 2
0
def generate_template_csv(abbreviations, filename, missing_id=None):
    fields = ('id', 'name', 'chamber', 'district', 'jurisdiction')

    with open(filename, 'w') as outfile:
        out = csv.DictWriter(outfile, fields)
        out.writeheader()

        for abbr in abbreviations:
            for person, filename in iter_objects(abbr, 'people'):
                skip = False

                if missing_id:
                    for oid in person.get('other_identifiers', []):
                        if oid['scheme'] == missing_id:
                            skip = True
                            break

                if not skip:
                    for role in person['roles']:
                        if role_is_active(role):
                            break
                    else:
                        raise Exception()
                    out.writerow({
                        'id': person['id'],
                        'name': person['name'],
                        'chamber': role['type'],
                        'district': role['district'],
                        'jurisdiction': role['jurisdiction'],
                    })
Ejemplo n.º 3
0
def generate_template_csv(abbreviations, filename, missing_id=None):
    fields = ("id", "name", "chamber", "district", "jurisdiction")

    with open(filename, "w") as outfile:
        out = csv.DictWriter(outfile, fields)
        out.writeheader()

        for abbr in abbreviations:
            for person, filename in iter_objects(abbr, "people"):
                skip = False

                if missing_id:
                    for oid in person.get("other_identifiers", []):
                        if oid["scheme"] == missing_id:
                            skip = True
                            break

                if not skip:
                    for role in person["roles"]:
                        if role_is_active(role):
                            break
                    else:
                        raise Exception()
                    out.writerow({
                        "id": person["id"],
                        "name": person["name"],
                        "chamber": role["type"],
                        "district": role["district"],
                        "jurisdiction": role["jurisdiction"],
                    })
Ejemplo n.º 4
0
def download_state_images(abbr, skip_existing):
    for person, _ in iter_objects(abbr, "legislature"):
        url = person.get("image")
        person_id = person["id"]
        if not url:
            continue

        img_bytes = upload(lambda: download_image(url),
                           f"images/original/{person_id}", skip_existing)
        # if the image got skipped, we can't do the resizes either, this means if we add new
        # profiles we need to run with --no-skip-existing
        if not img_bytes:
            continue

        # resize image so largest dimension is 200px
        upload(lambda: resize_image(img_bytes, 200),
               f"images/small/{person_id}", skip_existing)
Ejemplo n.º 5
0
def dir_to_mongo(abbr, create, clear_old_roles, verbose):
    db = pymongo.MongoClient(os.environ.get('BILLY_MONGO_HOST',
                                            'localhost'))['fiftystates']

    metadata = db.metadata.find({'_id': abbr})[0]
    latest_term = metadata['terms'][-1]['name']

    active_ids = []

    for person, filename in iter_objects(abbr, 'people'):

        legacy_ids = [
            oid['identifier'] for oid in person.get('other_identifiers', [])
            if oid['scheme'] == 'legacy_openstates'
        ]
        if not legacy_ids:
            if create:
                # get next ID
                new_id = get_next_id(db, abbr)
                legacy_ids = [new_id]
                if 'other_identifiers' not in person:
                    person['other_identifiers'] = []
                person['other_identifiers'].append({
                    'scheme': 'legacy_openstates',
                    'identifier': new_id
                })
                dump_obj(person, filename=filename)
            else:
                click.secho(
                    f'{filename} does not have legacy ID, run with --create',
                    fg='red')
                sys.exit(1)

        active_ids.append(legacy_ids[0])

        # handle name
        prefix, first_name, last_name, suffixes = name_tools.split(
            person['name'])

        # get chamber, district, party
        for role in person['roles']:
            if role_is_active(role):
                chamber = role['type']
                district = role['district']
                break
        for role in person['party']:
            if role_is_active(role):
                party = role['name']

        url = person['links'][0]['url']
        email = ''

        offices = []
        for cd in person.get('contact_details', []):
            office = {
                'fax':
                cd.get('fax'),
                'phone':
                cd.get('voice'),
                'address':
                cd.get('address'),
                'email':
                cd.get('email'),
                'name':
                cd['note'],
                'type':
                'capitol' if 'capitol' in cd['note'].lower() else 'district'
            }
            offices.append(office)
            if office['email'] and not email:
                email = office['email']

        # NE & DC
        if chamber == 'legislature':
            chamber = 'upper'

        # get some old data to keep around
        created_at = datetime.datetime.utcnow()
        old_roles = {}
        old_person = None
        try:
            old_person = db.legislators.find({'_id': legacy_ids[0]})[0]
            created_at = old_person['created_at']
            if not clear_old_roles:
                old_roles = old_person.get('old_roles', {})
        except IndexError:
            pass

        mongo_person = {
            '_id':
            legacy_ids[0],
            'leg_id':
            legacy_ids[0],
            '_all_ids':
            legacy_ids,
            '_type':
            'person',
            'active':
            True,
            'full_name':
            person['name'],
            '_scraped_name':
            person['name'],
            'photo_url':
            person.get('image'),
            'state':
            abbr,
            'district':
            district,
            'chamber':
            chamber,
            'party':
            party,
            'email':
            email,
            'url':
            url,
            'offices':
            offices,
            'created_at':
            created_at,
            'first_name':
            first_name,
            'middle_name':
            '',
            'last_name':
            last_name,
            'suffixes':
            suffixes,
            'sources':
            person['sources'],
            'old_roles':
            old_roles,
            'roles': [
                {
                    'term': latest_term,
                    'district': district,
                    'chamber': chamber,
                    'state': abbr,
                    'party': party,
                    'type': 'member',
                    'start_date': None,
                    'end_date': None
                },
            ],
        }
        # TODO: committee info
        # { "term" : "2017-2018", "committee_id" : "NCC000233", "chamber" : "lower",
        # "state" : "nc", "subcommittee" : null, "committee" : "State and Local Government II",
        # "position" : "member", "type" : "committee member" },

        # compare
        if old_person:
            old_person.pop('updated_at', None)
        if old_person == mongo_person:
            if verbose:
                click.secho(f'no updates to {mongo_person["_id"]}')
        else:
            # print(mongo_person, old_person)
            # raise Exception()
            click.secho(f'updating {mongo_person["_id"]}', fg='green')
            mongo_person['updated_at'] = datetime.datetime.utcnow()
            try:
                db.legislators.save(mongo_person)
            except Exception as e:
                print(e)
                continue

    to_retire = db.legislators.find({
        '_id': {
            '$nin': active_ids
        },
        'state': abbr
    })
    click.secho(f'going to try to retire {to_retire.count()}')
    for leg in to_retire:
        retire_person(db, leg)