def test_fix_bill_id(): expect = 'AB 74' bill_ids = ['A.B. 74', 'A.B.74', 'AB74', 'AB 0074', 'AB074', 'A.B.074', 'A.B. 074', 'A.B\t074'] for bill_id in bill_ids: assert utils.fix_bill_id(bill_id) == expect
def import_votes(state, data_dir): data_dir = os.path.join(data_dir, state) pattern = os.path.join(data_dir, 'votes', '*.json') paths = glob.glob(pattern) for path in paths: with open(path) as f: data = prepare_obj(json.load(f)) # clean up bill_id, needs to match the one already in the database data['bill_id'] = fix_bill_id(data['bill_id']) bill = db.bills.find_one({'state': state, 'chamber': data['bill_chamber'], 'session': data['session'], 'bill_id': data['bill_id']}) if not bill: _log.warning("Couldn't find bill %s" % data['bill_id']) continue del data['bill_id'] try: del data['filename'] except KeyError: pass for vtype in ('yes_votes', 'no_votes', 'other_votes'): svlist = [] for svote in data[vtype]: id = get_legislator_id(state, data['session'], data['chamber'], svote) svlist.append({'name': svote, 'leg_id': id}) data[vtype] = svlist for vote in bill['votes']: if (vote['motion'] == data['motion'] and vote['date'] == data['date']): vote.update(data) break else: bill['votes'].append(data) db.bills.save(bill, safe=True) print 'imported %s vote files' % len(paths)
def import_bills(state, data_dir): data_dir = os.path.join(data_dir, state) pattern = os.path.join(data_dir, 'bills', '*.json') meta = db.metadata.find_one({'_id': state}) # Build a session to term mapping sessions = {} for term in meta['terms']: for session in term['sessions']: sessions[session] = term['name'] paths = glob.glob(pattern) for path in paths: with open(path) as f: data = prepare_obj(json.load(f)) # clean up bill_id data['bill_id'] = fix_bill_id(data['bill_id']) subjects = data.pop('subjects', None) if subjects: data['scraped_subjects'] = subjects bill = db.bills.find_one({'state': data['state'], 'session': data['session'], 'chamber': data['chamber'], 'bill_id': data['bill_id']}) for sponsor in data['sponsors']: id = get_legislator_id(state, data['session'], None, sponsor['name']) sponsor['leg_id'] = id for vote in data['votes']: if 'committee' in vote: committee_id = get_committee_id(state, vote['chamber'], vote['committee']) vote['committee_id'] = committee_id for vtype in ('yes_votes', 'no_votes', 'other_votes'): svlist = [] for svote in vote[vtype]: id = get_legislator_id(state, data['session'], vote['chamber'], svote) svlist.append({'name': svote, 'leg_id': id}) vote[vtype] = svlist data['_term'] = sessions[data['session']] # Merge any version titles into the alternate_titles list alt_titles = set(data.get('alternate_titles', [])) for version in data['versions']: if 'title' in version: alt_titles.add(version['title']) if '+short_title' in version: alt_titles.add(version['+short_title']) try: # Make sure the primary title isn't included in the # alternate title list alt_titles.remove(data['title']) except KeyError: pass data['alternate_titles'] = list(alt_titles) if not bill: data['created_at'] = datetime.datetime.utcnow() data['updated_at'] = data['created_at'] data['_keywords'] = list(bill_keywords(data)) insert_with_id(data) else: data['_keywords'] = list(bill_keywords(data)) update(bill, data, db.bills) print 'imported %s bill files' % len(paths) populate_current_fields(state) ensure_indexes()