def import_bills(abbr, data_dir): data_dir = os.path.join(data_dir, abbr) pattern = os.path.join(data_dir, 'bills', '*.json') git_prelod(abbr) counts = {"update": 0, "insert": 0, "total": 0} votes = load_standalone_votes(data_dir) try: categorizer = SubjectCategorizer(abbr) except Exception as e: logger.debug('Proceeding without subject categorizer: %s' % e) categorizer = None paths = glob.glob(pattern) for path in paths: with open(path) as f: data = prepare_obj(json.load(f)) counts["total"] += 1 ret = import_bill(data, votes, categorizer) counts[ret] += 1 logger.info('imported %s bill files' % len(paths)) for remaining in votes.keys(): logger.debug('Failed to match vote %s %s %s' % tuple([r.encode('ascii', 'replace') for r in remaining])) populate_current_fields(abbr) git_commit("Import Update") return counts
def test_basic_categorization(): db.subjects.insert({'abbr': 'ex', 'remote': 'AK-47', 'normal': ['Guns', 'Crime']}) db.subjects.insert({'abbr': 'ex', 'remote': 'Hunting', 'normal': ['Guns']}) db.subjects.insert({'abbr': 'ex', 'remote': 'Candy', 'normal': ['Food']}) categorizer = SubjectCategorizer('ex') # simple bill = {'scraped_subjects': ['AK-47']} categorizer.categorize_bill(bill) assert_equal(bill, {'scraped_subjects': ['AK-47'], 'subjects': [u'Guns', u'Crime']}) # no subjects bill = {'scraped_subjects': ['Welfare']} categorizer.categorize_bill(bill) assert_equal(bill, {'scraped_subjects': ['Welfare'], 'subjects': []}) # two subjects bill = {'scraped_subjects': ['AK-47', 'Candy']} categorizer.categorize_bill(bill) assert_equal(set(bill['subjects']), set([u'Guns', u'Crime', u'Food'])) # avoid duplicates bill = {'scraped_subjects': ['AK-47', 'Hunting']} categorizer.categorize_bill(bill) assert_equal(bill, {'scraped_subjects': ['AK-47', 'Hunting'], 'subjects': [u'Guns', u'Crime']})
def import_bills(abbr, data_dir): data_dir = os.path.join(data_dir, abbr) pattern = os.path.join(data_dir, 'bills', '*.json') counts = {"update": 0, "insert": 0, "total": 0} votes = import_votes(data_dir) try: categorizer = SubjectCategorizer(abbr) except Exception as e: logger.debug('Proceeding without subject categorizer: %s' % e) categorizer = None paths = glob.glob(pattern) for path in paths: with open(path) as f: data = prepare_obj(json.load(f)) counts["total"] += 1 ret = import_bill(data, votes, categorizer) counts[ret] += 1 logger.info('imported %s bill files' % len(paths)) for remaining in votes.keys(): logger.debug('Failed to match vote %s %s %s' % tuple([r.encode('ascii', 'replace') for r in remaining])) meta = db.metadata.find_one({'_id': abbr}) level = meta['level'] populate_current_fields(level, abbr) ensure_indexes() return counts
def test_all_bills_categorization(): db.subjects.insert({ 'abbr': 'ex', 'remote': 'AK-47', 'normal': ['Guns', 'Crime'] }) db.subjects.insert({'abbr': 'ex', 'remote': 'Hunting', 'normal': ['Guns']}) db.subjects.insert({'abbr': 'ex', 'remote': 'Candy', 'normal': ['Food']}) categorizer = SubjectCategorizer('ex') # can insert dummy bills w/ state bills = [{ 'scraped_subjects': ['AK-47'], 'bill_id': '1', 'state': 'ex' }, { 'scraped_subjects': ['Welfare'], 'bill_id': '2', 'state': 'ex' }, { 'scraped_subjects': ['AK-47', 'Candy'], 'bill_id': '3', 'state': 'ex' }, { 'scraped_subjects': ['AK-47', 'Hunting'], 'bill_id': '4', 'state': 'ex' }] map(db.bills.insert, bills) # run categorization on all bills categorizer.categorize_bills() # simple bill = db.bills.find_one({'bill_id': '1'}) assert_equal(set(bill['subjects']), set([u'Guns', u'Crime'])) # no subjects bill = db.bills.find_one({'bill_id': '2'}) assert_equal(bill['subjects'], []) # two subjects bill = db.bills.find_one({'bill_id': '3'}) assert_equal(set(bill['subjects']), set([u'Guns', u'Crime', u'Food'])) # avoid duplicates bill = db.bills.find_one({'bill_id': '4'}) assert_equal(set(bill['subjects']), set([u'Guns', u'Crime']))