Exemple #1
0
def test_fix_bill_id():
    expect = 'AB 74'
    bill_ids = ['A.B. 74', 'A.B.74', 'AB74', 'AB 0074',
                'AB074', 'A.B.074', 'A.B. 074', 'A.B\t074']

    for bill_id in bill_ids:
        assert utils.fix_bill_id(bill_id) == expect
Exemple #2
0
def import_votes(state, data_dir):
    data_dir = os.path.join(data_dir, state)
    pattern = os.path.join(data_dir, 'votes', '*.json')

    paths = glob.glob(pattern)

    for path in paths:
        with open(path) as f:
            data = prepare_obj(json.load(f))

        # clean up bill_id, needs to match the one already in the database
        data['bill_id'] = fix_bill_id(data['bill_id'])

        bill = db.bills.find_one({'state': state,
                                  'chamber': data['bill_chamber'],
                                  'session': data['session'],
                                  'bill_id': data['bill_id']})

        if not bill:
            _log.warning("Couldn't find bill %s" % data['bill_id'])
            continue

        del data['bill_id']

        try:
            del data['filename']
        except KeyError:
            pass

        for vtype in ('yes_votes', 'no_votes', 'other_votes'):
            svlist = []
            for svote in data[vtype]:
                id = get_legislator_id(state, data['session'],
                                       data['chamber'], svote)
                svlist.append({'name': svote, 'leg_id': id})

            data[vtype] = svlist

        for vote in bill['votes']:
            if (vote['motion'] == data['motion']
                and vote['date'] == data['date']):
                vote.update(data)
                break
        else:
            bill['votes'].append(data)

        db.bills.save(bill, safe=True)

    print 'imported %s vote files' % len(paths)
Exemple #3
0
def import_bills(state, data_dir):
    data_dir = os.path.join(data_dir, state)
    pattern = os.path.join(data_dir, 'bills', '*.json')

    meta = db.metadata.find_one({'_id': state})

    # Build a session to term mapping
    sessions = {}
    for term in meta['terms']:
        for session in term['sessions']:
            sessions[session] = term['name']

    paths = glob.glob(pattern)

    for path in paths:
        with open(path) as f:
            data = prepare_obj(json.load(f))

        # clean up bill_id
        data['bill_id'] = fix_bill_id(data['bill_id'])

        subjects = data.pop('subjects', None)
        if subjects:
            data['scraped_subjects'] = subjects

        bill = db.bills.find_one({'state': data['state'],
                                  'session': data['session'],
                                  'chamber': data['chamber'],
                                  'bill_id': data['bill_id']})

        for sponsor in data['sponsors']:
            id = get_legislator_id(state, data['session'], None,
                                   sponsor['name'])
            sponsor['leg_id'] = id

        for vote in data['votes']:
            if 'committee' in vote:
                committee_id = get_committee_id(state,
                                                vote['chamber'],
                                                vote['committee'])
                vote['committee_id'] = committee_id

            for vtype in ('yes_votes', 'no_votes', 'other_votes'):
                svlist = []
                for svote in vote[vtype]:
                    id = get_legislator_id(state, data['session'],
                                           vote['chamber'], svote)
                    svlist.append({'name': svote, 'leg_id': id})

                vote[vtype] = svlist

        data['_term'] = sessions[data['session']]

        # Merge any version titles into the alternate_titles list
        alt_titles = set(data.get('alternate_titles', []))
        for version in data['versions']:
            if 'title' in version:
                alt_titles.add(version['title'])
            if '+short_title' in version:
                alt_titles.add(version['+short_title'])
        try:
            # Make sure the primary title isn't included in the
            # alternate title list
            alt_titles.remove(data['title'])
        except KeyError:
            pass
        data['alternate_titles'] = list(alt_titles)

        if not bill:
            data['created_at'] = datetime.datetime.utcnow()
            data['updated_at'] = data['created_at']
            data['_keywords'] = list(bill_keywords(data))
            insert_with_id(data)
        else:
            data['_keywords'] = list(bill_keywords(data))
            update(bill, data, db.bills)

    print 'imported %s bill files' % len(paths)

    populate_current_fields(state)
    ensure_indexes()