def test_all_bills_categorization(): db.subjects.insert({"abbr": "ex", "remote": "AK-47", "normal": ["Guns", "Crime"]}) db.subjects.insert({"abbr": "ex", "remote": "Hunting", "normal": ["Guns"]}) db.subjects.insert({"abbr": "ex", "remote": "Candy", "normal": ["Food"]}) categorizer = SubjectCategorizer("ex") # can insert dummy bills w/ state bills = [ {"scraped_subjects": ["AK-47"], "bill_id": "1", "state": "ex"}, {"scraped_subjects": ["Welfare"], "bill_id": "2", "state": "ex"}, {"scraped_subjects": ["AK-47", "Candy"], "bill_id": "3", "state": "ex"}, {"scraped_subjects": ["AK-47", "Hunting"], "bill_id": "4", "state": "ex"}, ] map(db.bills.insert, bills) # run categorization on all bills categorizer.categorize_bills() # simple bill = db.bills.find_one({"bill_id": "1"}) assert_equal(bill["subjects"], [u"Guns", u"Crime"]) # no subjects bill = db.bills.find_one({"bill_id": "2"}) assert_equal(bill["subjects"], []) # two subjects bill = db.bills.find_one({"bill_id": "3"}) assert_equal(set(bill["subjects"]), set([u"Guns", u"Crime", u"Food"])) # avoid duplicates bill = db.bills.find_one({"bill_id": "4"}) assert_equal(bill["subjects"], [u"Guns", u"Crime"])
def test_basic_categorization(): db.subjects.insert({'abbr': 'ex', 'remote': 'AK-47', 'normal': ['Guns', 'Crime']}) db.subjects.insert({'abbr': 'ex', 'remote': 'Hunting', 'normal': ['Guns']}) db.subjects.insert({'abbr': 'ex', 'remote': 'Candy', 'normal': ['Food']}) categorizer = SubjectCategorizer('ex') # simple bill = {'scraped_subjects': ['AK-47']} categorizer.categorize_bill(bill) assert_equal(bill, {'scraped_subjects': ['AK-47'], 'subjects': [u'Guns', u'Crime']}) # no subjects bill = {'scraped_subjects': ['Welfare']} categorizer.categorize_bill(bill) assert_equal(bill, {'scraped_subjects': ['Welfare'], 'subjects': []}) # two subjects bill = {'scraped_subjects': ['AK-47', 'Candy']} categorizer.categorize_bill(bill) assert_equal(set(bill['subjects']), set([u'Guns', u'Crime', u'Food'])) # avoid duplicates bill = {'scraped_subjects': ['AK-47', 'Hunting']} categorizer.categorize_bill(bill) assert_equal(bill, {'scraped_subjects': ['AK-47', 'Hunting'], 'subjects': [u'Guns', u'Crime']})
def test_basic_categorization(): db.subjects.insert({"abbr": "ex", "remote": "AK-47", "normal": ["Guns", "Crime"]}) db.subjects.insert({"abbr": "ex", "remote": "Hunting", "normal": ["Guns"]}) db.subjects.insert({"abbr": "ex", "remote": "Candy", "normal": ["Food"]}) categorizer = SubjectCategorizer("ex") # simple bill = {"scraped_subjects": ["AK-47"]} categorizer.categorize_bill(bill) assert_equal(bill, {"scraped_subjects": ["AK-47"], "subjects": [u"Guns", u"Crime"]}) # no subjects bill = {"scraped_subjects": ["Welfare"]} categorizer.categorize_bill(bill) assert_equal(bill, {"scraped_subjects": ["Welfare"], "subjects": []}) # two subjects bill = {"scraped_subjects": ["AK-47", "Candy"]} categorizer.categorize_bill(bill) assert_equal(set(bill["subjects"]), set([u"Guns", u"Crime", u"Food"])) # avoid duplicates bill = {"scraped_subjects": ["AK-47", "Hunting"]} categorizer.categorize_bill(bill) assert_equal(bill, {"scraped_subjects": ["AK-47", "Hunting"], "subjects": [u"Guns", u"Crime"]})
def test_all_bills_categorization(): db.subjects.insert({ 'abbr': 'ex', 'remote': 'AK-47', 'normal': ['Guns', 'Crime'] }) db.subjects.insert({'abbr': 'ex', 'remote': 'Hunting', 'normal': ['Guns']}) db.subjects.insert({'abbr': 'ex', 'remote': 'Candy', 'normal': ['Food']}) categorizer = SubjectCategorizer('ex') # can insert dummy bills w/ state bills = [{ 'scraped_subjects': ['AK-47'], 'bill_id': '1', 'state': 'ex' }, { 'scraped_subjects': ['Welfare'], 'bill_id': '2', 'state': 'ex' }, { 'scraped_subjects': ['AK-47', 'Candy'], 'bill_id': '3', 'state': 'ex' }, { 'scraped_subjects': ['AK-47', 'Hunting'], 'bill_id': '4', 'state': 'ex' }] map(db.bills.insert, bills) # run categorization on all bills categorizer.categorize_bills() # simple bill = db.bills.find_one({'bill_id': '1'}) assert_equal(set(bill['subjects']), set([u'Guns', u'Crime'])) # no subjects bill = db.bills.find_one({'bill_id': '2'}) assert_equal(bill['subjects'], []) # two subjects bill = db.bills.find_one({'bill_id': '3'}) assert_equal(set(bill['subjects']), set([u'Guns', u'Crime', u'Food'])) # avoid duplicates bill = db.bills.find_one({'bill_id': '4'}) assert_equal(set(bill['subjects']), set([u'Guns', u'Crime']))
def import_bills(abbr, data_dir): data_dir = os.path.join(data_dir, abbr) pattern = os.path.join(data_dir, 'bills', '*.json') git_prelod(abbr) counts = {"update": 0, "insert": 0, "total": 0} votes = load_standalone_votes(data_dir) try: categorizer = SubjectCategorizer(abbr) except Exception as e: logger.debug('Proceeding without subject categorizer: %s' % e) categorizer = None paths = glob.glob(pattern) for path in paths: with open(path) as f: data = prepare_obj(json.load(f)) counts["total"] += 1 ret = import_bill(data, votes, categorizer) counts[ret] += 1 logger.info('imported %s bill files' % len(paths)) for remaining in votes.keys(): logger.debug('Failed to match vote %s %s %s' % tuple([r.encode('ascii', 'replace') for r in remaining])) populate_current_fields(abbr) git_commit("Import Update") return counts
def import_bills(abbr, data_dir): data_dir = os.path.join(data_dir, abbr) pattern = os.path.join(data_dir, 'bills', '*.json') counts = {"update": 0, "insert": 0, "total": 0} votes = import_votes(data_dir) try: categorizer = SubjectCategorizer(abbr) except Exception as e: logger.debug('Proceeding without subject categorizer: %s' % e) categorizer = None paths = glob.glob(pattern) for path in paths: with open(path) as f: data = prepare_obj(json.load(f)) counts["total"] += 1 ret = import_bill(data, votes, categorizer) counts[ret] += 1 logger.info('imported %s bill files' % len(paths)) for remaining in votes.keys(): logger.debug('Failed to match vote %s %s %s' % tuple([r.encode('ascii', 'replace') for r in remaining])) meta = db.metadata.find_one({'_id': abbr}) level = meta['level'] populate_current_fields(level, abbr) ensure_indexes() return counts
def test_all_bills_categorization(): db.subjects.insert({'abbr': 'ex', 'remote': 'AK-47', 'normal': ['Guns', 'Crime']}) db.subjects.insert({'abbr': 'ex', 'remote': 'Hunting', 'normal': ['Guns']}) db.subjects.insert({'abbr': 'ex', 'remote': 'Candy', 'normal': ['Food']}) categorizer = SubjectCategorizer('ex') # can insert dummy bills w/ state bills = [{'scraped_subjects': ['AK-47'], 'bill_id': '1', 'state': 'ex'}, {'scraped_subjects': ['Welfare'], 'bill_id': '2', 'state': 'ex'}, {'scraped_subjects': ['AK-47', 'Candy'], 'bill_id': '3', 'state': 'ex'}, {'scraped_subjects': ['AK-47', 'Hunting'], 'bill_id': '4', 'state': 'ex'}] list(map(db.bills.insert, bills)) # run categorization on all bills categorizer.categorize_bills() # simple bill = db.bills.find_one({'bill_id': '1'}) assert_equal(set(bill['subjects']), set([u'Guns', u'Crime'])) # no subjects bill = db.bills.find_one({'bill_id': '2'}) assert_equal(bill['subjects'], []) # two subjects bill = db.bills.find_one({'bill_id': '3'}) assert_equal(set(bill['subjects']), set([u'Guns', u'Crime', u'Food'])) # avoid duplicates bill = db.bills.find_one({'bill_id': '4'}) assert_equal(set(bill['subjects']), set([u'Guns', u'Crime']))
def test_basic_categorization(): db.subjects.insert({'abbr': 'ex', 'remote': 'AK-47', 'normal': ['Guns', 'Crime']}) db.subjects.insert({'abbr': 'ex', 'remote': 'Hunting', 'normal': ['Guns']}) db.subjects.insert({'abbr': 'ex', 'remote': 'Candy', 'normal': ['Food']}) categorizer = SubjectCategorizer('ex') # simple bill = {'scraped_subjects': ['AK-47']} categorizer.categorize_bill(bill) bill['subjects'] = sorted(bill['subjects']) assert_equal(bill, {'scraped_subjects': ['AK-47'], 'subjects': [u'Crime', u'Guns']}) # no subjects bill = {'scraped_subjects': ['Welfare']} categorizer.categorize_bill(bill) bill['subjects'] = sorted(bill['subjects']) assert_equal(bill, {'scraped_subjects': ['Welfare'], 'subjects': []}) # two subjects bill = {'scraped_subjects': ['AK-47', 'Candy']} categorizer.categorize_bill(bill) bill['subjects'] = sorted(bill['subjects']) assert_equal(bill['subjects'], [u'Crime', u'Food', u'Guns']) # avoid duplicates bill = {'scraped_subjects': ['AK-47', 'Hunting']} categorizer.categorize_bill(bill) bill['subjects'] = sorted(bill['subjects']) assert_equal(bill, {'scraped_subjects': ['AK-47', 'Hunting'], 'subjects': [u'Crime', u'Guns']})