Example #1
0
def test_all_bills_categorization():

    db.subjects.insert({"abbr": "ex", "remote": "AK-47", "normal": ["Guns", "Crime"]})
    db.subjects.insert({"abbr": "ex", "remote": "Hunting", "normal": ["Guns"]})
    db.subjects.insert({"abbr": "ex", "remote": "Candy", "normal": ["Food"]})

    categorizer = SubjectCategorizer("ex")

    # can insert dummy bills w/ state
    bills = [
        {"scraped_subjects": ["AK-47"], "bill_id": "1", "state": "ex"},
        {"scraped_subjects": ["Welfare"], "bill_id": "2", "state": "ex"},
        {"scraped_subjects": ["AK-47", "Candy"], "bill_id": "3", "state": "ex"},
        {"scraped_subjects": ["AK-47", "Hunting"], "bill_id": "4", "state": "ex"},
    ]
    map(db.bills.insert, bills)

    # run categorization on all bills
    categorizer.categorize_bills()

    # simple
    bill = db.bills.find_one({"bill_id": "1"})
    assert_equal(bill["subjects"], [u"Guns", u"Crime"])

    # no subjects
    bill = db.bills.find_one({"bill_id": "2"})
    assert_equal(bill["subjects"], [])

    # two subjects
    bill = db.bills.find_one({"bill_id": "3"})
    assert_equal(set(bill["subjects"]), set([u"Guns", u"Crime", u"Food"]))

    # avoid duplicates
    bill = db.bills.find_one({"bill_id": "4"})
    assert_equal(bill["subjects"], [u"Guns", u"Crime"])
Example #2
0
def test_basic_categorization():

    db.subjects.insert({'abbr': 'ex', 'remote': 'AK-47',
                        'normal': ['Guns', 'Crime']})
    db.subjects.insert({'abbr': 'ex', 'remote': 'Hunting', 'normal': ['Guns']})
    db.subjects.insert({'abbr': 'ex', 'remote': 'Candy', 'normal': ['Food']})

    categorizer = SubjectCategorizer('ex')

    # simple
    bill = {'scraped_subjects': ['AK-47']}
    categorizer.categorize_bill(bill)
    assert_equal(bill, {'scraped_subjects': ['AK-47'],
                        'subjects': [u'Guns', u'Crime']})

    # no subjects
    bill = {'scraped_subjects': ['Welfare']}
    categorizer.categorize_bill(bill)
    assert_equal(bill, {'scraped_subjects': ['Welfare'],
                        'subjects': []})

    # two subjects
    bill = {'scraped_subjects': ['AK-47', 'Candy']}
    categorizer.categorize_bill(bill)
    assert_equal(set(bill['subjects']), set([u'Guns', u'Crime', u'Food']))

    # avoid duplicates
    bill = {'scraped_subjects': ['AK-47', 'Hunting']}
    categorizer.categorize_bill(bill)
    assert_equal(bill, {'scraped_subjects': ['AK-47', 'Hunting'],
                        'subjects': [u'Guns', u'Crime']})
Example #3
0
def test_basic_categorization():

    db.subjects.insert({"abbr": "ex", "remote": "AK-47", "normal": ["Guns", "Crime"]})
    db.subjects.insert({"abbr": "ex", "remote": "Hunting", "normal": ["Guns"]})
    db.subjects.insert({"abbr": "ex", "remote": "Candy", "normal": ["Food"]})

    categorizer = SubjectCategorizer("ex")

    # simple
    bill = {"scraped_subjects": ["AK-47"]}
    categorizer.categorize_bill(bill)
    assert_equal(bill, {"scraped_subjects": ["AK-47"], "subjects": [u"Guns", u"Crime"]})

    # no subjects
    bill = {"scraped_subjects": ["Welfare"]}
    categorizer.categorize_bill(bill)
    assert_equal(bill, {"scraped_subjects": ["Welfare"], "subjects": []})

    # two subjects
    bill = {"scraped_subjects": ["AK-47", "Candy"]}
    categorizer.categorize_bill(bill)
    assert_equal(set(bill["subjects"]), set([u"Guns", u"Crime", u"Food"]))

    # avoid duplicates
    bill = {"scraped_subjects": ["AK-47", "Hunting"]}
    categorizer.categorize_bill(bill)
    assert_equal(bill, {"scraped_subjects": ["AK-47", "Hunting"], "subjects": [u"Guns", u"Crime"]})
Example #4
0
def test_all_bills_categorization():

    db.subjects.insert({
        'abbr': 'ex',
        'remote': 'AK-47',
        'normal': ['Guns', 'Crime']
    })
    db.subjects.insert({'abbr': 'ex', 'remote': 'Hunting', 'normal': ['Guns']})
    db.subjects.insert({'abbr': 'ex', 'remote': 'Candy', 'normal': ['Food']})

    categorizer = SubjectCategorizer('ex')

    # can insert dummy bills w/ state
    bills = [{
        'scraped_subjects': ['AK-47'],
        'bill_id': '1',
        'state': 'ex'
    }, {
        'scraped_subjects': ['Welfare'],
        'bill_id': '2',
        'state': 'ex'
    }, {
        'scraped_subjects': ['AK-47', 'Candy'],
        'bill_id': '3',
        'state': 'ex'
    }, {
        'scraped_subjects': ['AK-47', 'Hunting'],
        'bill_id': '4',
        'state': 'ex'
    }]
    map(db.bills.insert, bills)

    # run categorization on all bills
    categorizer.categorize_bills()

    # simple
    bill = db.bills.find_one({'bill_id': '1'})
    assert_equal(set(bill['subjects']), set([u'Guns', u'Crime']))

    # no subjects
    bill = db.bills.find_one({'bill_id': '2'})
    assert_equal(bill['subjects'], [])

    # two subjects
    bill = db.bills.find_one({'bill_id': '3'})
    assert_equal(set(bill['subjects']), set([u'Guns', u'Crime', u'Food']))

    # avoid duplicates
    bill = db.bills.find_one({'bill_id': '4'})
    assert_equal(set(bill['subjects']), set([u'Guns', u'Crime']))
Example #5
0
def import_bills(abbr, data_dir):
    data_dir = os.path.join(data_dir, abbr)
    pattern = os.path.join(data_dir, 'bills', '*.json')

    git_prelod(abbr)

    counts = {"update": 0, "insert": 0, "total": 0}

    votes = load_standalone_votes(data_dir)
    try:
        categorizer = SubjectCategorizer(abbr)
    except Exception as e:
        logger.debug('Proceeding without subject categorizer: %s' % e)
        categorizer = None

    paths = glob.glob(pattern)
    for path in paths:
        with open(path) as f:
            data = prepare_obj(json.load(f))

        counts["total"] += 1
        ret = import_bill(data, votes, categorizer)
        counts[ret] += 1

    logger.info('imported %s bill files' % len(paths))

    for remaining in votes.keys():
        logger.debug('Failed to match vote %s %s %s' %
                     tuple([r.encode('ascii', 'replace') for r in remaining]))

    populate_current_fields(abbr)

    git_commit("Import Update")

    return counts
Example #6
0
def import_bills(abbr, data_dir):
    data_dir = os.path.join(data_dir, abbr)
    pattern = os.path.join(data_dir, 'bills', '*.json')

    counts = {"update": 0, "insert": 0, "total": 0}

    votes = import_votes(data_dir)
    try:
        categorizer = SubjectCategorizer(abbr)
    except Exception as e:
        logger.debug('Proceeding without subject categorizer: %s' % e)
        categorizer = None

    paths = glob.glob(pattern)
    for path in paths:
        with open(path) as f:
            data = prepare_obj(json.load(f))

        counts["total"] += 1
        ret = import_bill(data, votes, categorizer)
        counts[ret] += 1

    logger.info('imported %s bill files' % len(paths))

    for remaining in votes.keys():
        logger.debug('Failed to match vote %s %s %s' %
                     tuple([r.encode('ascii', 'replace') for r in remaining]))

    meta = db.metadata.find_one({'_id': abbr})
    level = meta['level']
    populate_current_fields(level, abbr)

    ensure_indexes()

    return counts
Example #7
0
def test_all_bills_categorization():

    db.subjects.insert({'abbr': 'ex', 'remote': 'AK-47',
                        'normal': ['Guns', 'Crime']})
    db.subjects.insert({'abbr': 'ex', 'remote': 'Hunting', 'normal': ['Guns']})
    db.subjects.insert({'abbr': 'ex', 'remote': 'Candy', 'normal': ['Food']})

    categorizer = SubjectCategorizer('ex')

    # can insert dummy bills w/ state
    bills = [{'scraped_subjects': ['AK-47'], 'bill_id': '1', 'state': 'ex'},
             {'scraped_subjects': ['Welfare'], 'bill_id': '2', 'state': 'ex'},
             {'scraped_subjects': ['AK-47', 'Candy'], 'bill_id': '3',
              'state': 'ex'},
             {'scraped_subjects': ['AK-47', 'Hunting'], 'bill_id': '4',
              'state': 'ex'}]
    list(map(db.bills.insert, bills))

    # run categorization on all bills
    categorizer.categorize_bills()

    # simple
    bill = db.bills.find_one({'bill_id': '1'})
    assert_equal(set(bill['subjects']), set([u'Guns', u'Crime']))

    # no subjects
    bill = db.bills.find_one({'bill_id': '2'})
    assert_equal(bill['subjects'], [])

    # two subjects
    bill = db.bills.find_one({'bill_id': '3'})
    assert_equal(set(bill['subjects']), set([u'Guns', u'Crime', u'Food']))

    # avoid duplicates
    bill = db.bills.find_one({'bill_id': '4'})
    assert_equal(set(bill['subjects']), set([u'Guns', u'Crime']))
Example #8
0
def test_basic_categorization():

    db.subjects.insert({'abbr': 'ex', 'remote': 'AK-47',
                        'normal': ['Guns', 'Crime']})
    db.subjects.insert({'abbr': 'ex', 'remote': 'Hunting', 'normal': ['Guns']})
    db.subjects.insert({'abbr': 'ex', 'remote': 'Candy', 'normal': ['Food']})

    categorizer = SubjectCategorizer('ex')

    # simple
    bill = {'scraped_subjects': ['AK-47']}
    categorizer.categorize_bill(bill)
    bill['subjects'] = sorted(bill['subjects'])

    assert_equal(bill, {'scraped_subjects': ['AK-47'],
                        'subjects': [u'Crime', u'Guns']})

    # no subjects
    bill = {'scraped_subjects': ['Welfare']}
    categorizer.categorize_bill(bill)
    bill['subjects'] = sorted(bill['subjects'])
    assert_equal(bill, {'scraped_subjects': ['Welfare'],
                        'subjects': []})

    # two subjects
    bill = {'scraped_subjects': ['AK-47', 'Candy']}
    categorizer.categorize_bill(bill)
    bill['subjects'] = sorted(bill['subjects'])
    assert_equal(bill['subjects'], [u'Crime', u'Food', u'Guns'])

    # avoid duplicates
    bill = {'scraped_subjects': ['AK-47', 'Hunting']}
    categorizer.categorize_bill(bill)
    bill['subjects'] = sorted(bill['subjects'])
    assert_equal(bill, {'scraped_subjects': ['AK-47', 'Hunting'],
                        'subjects': [u'Crime', u'Guns']})