def import_bills(state, data_dir): data_dir = os.path.join(data_dir, state) pattern = os.path.join(data_dir, "bills", "*.json") for path in glob.iglob(pattern): with open(path) as f: data = prepare_obj(json.load(f)) bill = db.bills.find_one( {"state": data["state"], "session": data["session"], "chamber": data["chamber"], "bill_id": data["bill_id"]} ) for sponsor in data["sponsors"]: id = get_legislator_id(state, data["session"], None, sponsor["name"]) sponsor["leg_id"] = id for vote in data["votes"]: for vtype in ("yes_votes", "no_votes", "other_votes"): svlist = [] for svote in vote[vtype]: id = get_legislator_id(state, data["session"], vote["chamber"], svote) svlist.append({"name": svote, "leg_id": id}) vote[vtype] = svlist if not bill: data["created_at"] = datetime.datetime.now() data["updated_at"] = data["created_at"] data["keywords"] = list(keywordize(data["title"])) insert_with_id(data) else: data["keywords"] = list(keywordize(data["title"])) update(bill, data, db.bills)
def import_events(state, data_dir): data_dir = os.path.join(data_dir, state) pattern = os.path.join(data_dir, 'events', '*.json') for path in glob.iglob(pattern): with open(path) as f: data = prepare_obj(json.load(f)) event = None if '_guid' in data: event = db.events.find_one({'state': data['state'], '_guid': data['_guid']}) if not event: event = db.events.find_one({'state': data['state'], 'when': data['when'], 'end': data['end'], 'type': data['type'], 'description': data['description']}) if not event: data['created_at'] = datetime.datetime.utcnow() data['updated_at'] = data['created_at'] _insert_with_id(data) else: update(event, data, db.events) actions_to_events(state) ensure_indexes()
def import_legislators(state, data_dir): data_dir = os.path.join(data_dir, state) pattern = os.path.join(data_dir, 'legislators', '*.json') for path in glob.iglob(pattern): with open(path) as f: data = prepare_obj(json.load(f)) import_legislator(data)
def import_metadata(state, data_dir): data_dir = os.path.join(data_dir, state) with open(os.path.join(data_dir, 'state_metadata.json')) as f: data = json.load(f) data['_type'] = 'metadata' data = prepare_obj(data) data['_id'] = state db.metadata.save(data, safe=True)
def import_legislators(state, data_dir): data_dir = os.path.join(data_dir, state) pattern = os.path.join(data_dir, 'legislators', '*.json') paths = glob.glob(pattern) for path in paths: with open(path) as f: data = prepare_obj(json.load(f)) import_legislator(data) print 'imported %s legislator files' % len(paths) activate_legislators(state)
def import_votes(state, data_dir): data_dir = os.path.join(data_dir, state) pattern = os.path.join(data_dir, 'votes', '*.json') paths = glob.glob(pattern) for path in paths: with open(path) as f: data = prepare_obj(json.load(f)) # clean up bill_id, needs to match the one already in the database data['bill_id'] = fix_bill_id(data['bill_id']) bill = db.bills.find_one({'state': state, 'chamber': data['bill_chamber'], 'session': data['session'], 'bill_id': data['bill_id']}) if not bill: _log.warning("Couldn't find bill %s" % data['bill_id']) continue del data['bill_id'] try: del data['filename'] except KeyError: pass for vtype in ('yes_votes', 'no_votes', 'other_votes'): svlist = [] for svote in data[vtype]: id = get_legislator_id(state, data['session'], data['chamber'], svote) svlist.append({'name': svote, 'leg_id': id}) data[vtype] = svlist for vote in bill['votes']: if (vote['motion'] == data['motion'] and vote['date'] == data['date']): vote.update(data) break else: bill['votes'].append(data) db.bills.save(bill, safe=True) print 'imported %s vote files' % len(paths)
def import_metadata(state, data_dir): preserved = {} old_metadata = db.metadata.find_one({'_id':state}) or {} for field in PRESERVED_FIELDS: if field in old_metadata: preserved[field] = old_metadata[field] data_dir = os.path.join(data_dir, state) with open(os.path.join(data_dir, 'state_metadata.json')) as f: data = json.load(f) data['_type'] = 'metadata' data = prepare_obj(data) data['_id'] = state data.update(preserved) db.metadata.save(data, safe=True)
def import_committees(state, data_dir): data_dir = os.path.join(data_dir, state) pattern = os.path.join(data_dir, 'committees', '*.json') for path in glob.iglob(pattern): with open(path) as f: data = prepare_obj(json.load(f)) meta = db.metadata.find_one({'_id': state}) current_term = meta['terms'][-1]['name'] for member in data['members']: if not member['legislator']: continue (pre, first, last, suff) = name_tools.split(member['legislator']) found = db.legislators.find({ 'first_name': first, 'last_name': last, 'roles': {'$elemMatch': {'term': current_term, 'state': state}}}) if found.count() > 1: print "Too many matches for %s" % member['legislator'] continue elif found.count() == 0: print "No matches for %s" % member['legislator'] continue legislator = found[0] for role in legislator['roles']: if (role['type'] == 'committee member' and role['term'] == current_term and role['committee'] == data['name']): break else: legislator['roles'].append({ 'type': 'committee member', 'committee': data['name'], 'term': current_term, 'chamber': data['chamber']}) legislator['updated_at'] = datetime.datetime.now() db.legislators.save(legislator)
def import_legislator(data): data = prepare_obj(data) data['_scraped_name'] = data['full_name'] # Rename 'role' -> 'type' for role in data['roles']: if 'role' in role: role['type'] = role['role'] del role['role'] cur_role = data['roles'][0] term = cur_role['term'] prev_term = get_previous_term(data['state'], term) next_term = get_next_term(data['state'], term) spec = {'state': data['state'], 'type': cur_role['type'], 'term': {'$in': [term, prev_term, next_term]}} if 'district' in cur_role: spec['district'] = cur_role['district'] if 'chamber' in cur_role: spec['chamber'] = cur_role['chamber'] leg = db.legislators.find_one( {'state': data['state'], '_scraped_name': data['full_name'], 'roles': {'$elemMatch': spec}}) if leg: if 'old_roles' not in leg: leg['old_roles'] = {} if leg['roles'][0]['term'] == prev_term: # Move to old leg['old_roles'][leg['roles'][0]['term']] = leg['roles'] elif leg['roles'][0]['term'] == next_term: leg['old_roles'][term] = data['roles'] data['roles'] = leg['roles'] update(leg, data, db.legislators) else: insert_with_id(data)
def import_votes(state, data_dir): data_dir = os.path.join(data_dir, state) pattern = os.path.join(data_dir, "votes", "*.json") for path in glob.iglob(pattern): with open(path) as f: data = prepare_obj(json.load(f)) bill = db.bills.find_one( {"state": state, "chamber": data["bill_chamber"], "session": data["session"], "bill_id": data["bill_id"]} ) if not bill: _log.warning("Couldn't find bill %s" % data["bill_id"]) continue del data["bill_id"] try: del data["filename"] except KeyError: pass for vtype in ("yes_votes", "no_votes", "other_votes"): svlist = [] for svote in data[vtype]: id = get_legislator_id(state, data["session"], data["chamber"], svote) svlist.append({"name": svote, "leg_id": id}) data[vtype] = svlist for vote in bill["votes"]: if vote["motion"] == data["motion"] and vote["date"] == data["date"]: vote.update(data) break else: bill["votes"].append(data) db.bills.save(bill, safe=True)
def import_votes(state, data_dir): data_dir = os.path.join(data_dir, state) pattern = os.path.join(data_dir, 'votes', '*.json') for path in glob.iglob(pattern): with open(path) as f: data = prepare_obj(json.load(f)) bill = db.bills.find_one({'state': state, 'chamber': data['bill_chamber'], 'session': data['session'], 'bill_id': data['bill_id']}) if not bill: _log.warning("Couldn't find bill %s" % data['bill_id']) continue del data['bill_id'] del data['filename'] for vtype in ('yes_votes', 'no_votes', 'other_votes'): svlist = [] for svote in data[vtype]: id = get_legislator_id(state, data['session'], data['chamber'], svote) svlist.append({'name': svote, 'leg_id': id}) data[vtype] = svlist for vote in bill['votes']: if (vote['motion'] == data['motion'] and vote['date'] == data['date']): vote.update(data) break else: bill['votes'].append(data) db.bills.save(bill, safe=True)
def import_committees(state, data_dir): data_dir = os.path.join(data_dir, state) pattern = os.path.join(data_dir, "committees", "*.json") meta = db.metadata.find_one({"_id": state}) current_term = meta["terms"][-1]["name"] current_session = meta["terms"][-1]["sessions"][-1] paths = glob.glob(pattern) if not paths: # Not standalone committees for legislator in db.legislators.find({"roles": {"$elemMatch": {"term": current_term, "state": state}}}): for role in legislator["roles"]: if role["type"] == "committee member" and "committee_id" not in role: spec = {"state": role["state"], "chamber": role["chamber"], "committee": role["committee"]} if "subcommittee" in role: spec["subcommittee"] = role["subcommittee"] committee = db.committees.find_one(spec) if not committee: committee = spec committee["_type"] = "committee" committee["members"] = [] committee["sources"] = [] insert_with_id(committee) for member in committee["members"]: if member["leg_id"] == legislator["leg_id"]: break else: committee["members"].append( {"name": legislator["full_name"], "leg_id": legislator["leg_id"], "role": "member"} ) db.committees.save(committee, safe=True) role["committee_id"] = committee["_id"] db.legislators.save(legislator, safe=True) for path in paths: with open(path) as f: data = prepare_obj(json.load(f)) spec = {"state": state, "chamber": data["chamber"], "committee": data["committee"]} if "subcommittee" in data: spec["subcommittee"] = data["subcommittee"] committee = db.committees.find_one(spec) if not committee: insert_with_id(data) committee = data else: update(committee, data, db.committees) for member in committee["members"]: if not member["name"]: continue (pre, first, last, suff) = name_tools.split(member["name"]) leg_id = get_legislator_id(state, current_session, data["chamber"], member["name"]) if not leg_id: print "No matches for %s" % member["name"].encode("ascii", "ignore") member["leg_id"] = None continue legislator = db.legislators.find_one({"_id": leg_id}) member["leg_id"] = leg_id for role in legislator["roles"]: if ( role["type"] == "committee member" and role["term"] == current_term and role["committee_id"] == committee["_id"] ): break else: new_role = { "type": "committee member", "committee": committee["committee"], "term": current_term, "chamber": committee["chamber"], "committee_id": committee["_id"], "state": state, } if "subcommittee" in committee: new_role["subcommittee"] = committee["subcommittee"] legislator["roles"].append(new_role) legislator["updated_at"] = datetime.datetime.utcnow() db.legislators.save(legislator, safe=True) db.committees.save(committee, safe=True) print "imported %s committee files" % len(paths) link_parents(state) ensure_indexes()
def import_bills(state, data_dir): data_dir = os.path.join(data_dir, state) pattern = os.path.join(data_dir, 'bills', '*.json') meta = db.metadata.find_one({'_id': state}) # Build a session to term mapping sessions = {} for term in meta['terms']: for session in term['sessions']: sessions[session] = term['name'] for path in glob.iglob(pattern): with open(path) as f: data = prepare_obj(json.load(f)) bill = db.bills.find_one({'state': data['state'], 'session': data['session'], 'chamber': data['chamber'], 'bill_id': data['bill_id']}) for sponsor in data['sponsors']: id = get_legislator_id(state, data['session'], None, sponsor['name']) sponsor['leg_id'] = id for vote in data['votes']: for vtype in ('yes_votes', 'no_votes', 'other_votes'): svlist = [] for svote in vote[vtype]: id = get_legislator_id(state, data['session'], vote['chamber'], svote) svlist.append({'name': svote, 'leg_id': id}) vote[vtype] = svlist data['_term'] = sessions[data['session']] # Merge any version titles into the alternate_titles list alt_titles = set(data['alternate_titles']) for version in data['versions']: if 'title' in version: alt_titles.add(version['title']) if '+short_title' in version: alt_titles.add(version['+short_title']) try: # Make sure the primary title isn't included in the # alternate title list alt_titles.remove(data['title']) except KeyError: pass data['alternate_titles'] = list(alt_titles) if not bill: data['created_at'] = datetime.datetime.now() data['updated_at'] = data['created_at'] data['_keywords'] = list(bill_keywords(data)) insert_with_id(data) else: data['_keywords'] = list(bill_keywords(data)) update(bill, data, db.bills) populate_current_fields(state) ensure_indexes()
def import_committees(state, data_dir): data_dir = os.path.join(data_dir, state) pattern = os.path.join(data_dir, 'committees', '*.json') meta = db.metadata.find_one({'_id': state}) current_term = meta['terms'][-1]['name'] paths = glob.glob(pattern) if not paths: # Not standalone committees for legislator in db.legislators.find({ 'roles': {'$elemMatch': {'term': current_term, 'state': state}}}): for role in legislator['roles']: if (role['type'] == 'committee member' and 'committee_id' not in role): spec = {'state': role['state'], 'chamber': role['chamber'], 'committee': role['committee']} if 'subcommittee' in role: spec['subcommittee'] = role['subcommittee'] committee = db.committees.find_one(spec) if not committee: committee = spec committee['_type'] = 'committee' committee['members'] = [] insert_with_id(committee) for member in committee['members']: if member['leg_id'] == legislator['leg_id']: break else: committee['members'].append( {'name': legislator['full_name'], 'leg_id': legislator['leg_id'], 'role': 'member'}) db.committees.save(committee, safe=True) role['committee_id'] = committee['_id'] db.legislators.save(legislator, safe=True) for path in paths: with open(path) as f: data = prepare_obj(json.load(f)) spec = {'state': state, 'committee': data['committee']} if 'subcommittee' in data: spec['subcommittee'] = data['subcommittee'] committee = db.committees.find_one(spec) if not committee: insert_with_id(data) committee = data else: update(committee, data, db.committees) for member in committee['members']: if not member['legislator']: continue (pre, first, last, suff) = name_tools.split(member['legislator']) found = db.legislators.find({ 'first_name': first, 'last_name': last, 'roles': {'$elemMatch': {'term': current_term, 'state': state}}}) if found.count() > 1: print "Too many matches for %s" % member['legislator'].encode( 'ascii', 'ignore') continue elif found.count() == 0: print "No matches for %s" % member['legislator'].encode( 'ascii', 'ignore') continue legislator = found[0] member['leg_id'] = legislator['_id'] for role in legislator['roles']: if (role['type'] == 'committee member' and role['term'] == current_term and role['committee_id'] == committee['_id']): break else: new_role = {'type': 'committee member', 'committee': committee['committee'], 'term': current_term, 'chamber': committee['chamber'], 'committee_id': committee['_id'], 'state': state} if 'subcommittee' in committee: new_role['subcommittee'] = committee['subcommittee'] legislator['roles'].append(new_role) legislator['updated_at'] = datetime.datetime.now() db.legislators.save(legislator, safe=True) db.committees.save(committee, safe=True) ensure_indexes()